diff options
author | Sven Gothel <[email protected]> | 2014-06-17 03:12:28 +0200 |
---|---|---|
committer | Sven Gothel <[email protected]> | 2014-06-17 03:12:28 +0200 |
commit | afb698ac3846713e5000226de9e28a9b06bf6f9a (patch) | |
tree | 83f795d0d9023ac022fe9b7af35f819125b79f17 /src/main/antlr/com/jogamp/gluegen | |
parent | c3054a01990e55ab35756ea23ab7d7c05f24dd37 (diff) | |
parent | 5d802fb8dd4004039d4597253712d24fffb90a36 (diff) |
Merge remote-tracking branch 'hharrison/antlr-move'
Diffstat (limited to 'src/main/antlr/com/jogamp/gluegen')
-rw-r--r-- | src/main/antlr/com/jogamp/gluegen/cgram/GnuCEmitter.g | 1145 | ||||
-rw-r--r-- | src/main/antlr/com/jogamp/gluegen/cgram/GnuCParser.g | 878 | ||||
-rw-r--r-- | src/main/antlr/com/jogamp/gluegen/cgram/GnuCTreeParser.g | 867 | ||||
-rw-r--r-- | src/main/antlr/com/jogamp/gluegen/cgram/HeaderParser.g | 785 | ||||
-rw-r--r-- | src/main/antlr/com/jogamp/gluegen/cgram/StdCParser.g | 1401 | ||||
-rw-r--r-- | src/main/antlr/com/jogamp/gluegen/jgram/JavaParser.g | 1315 |
6 files changed, 6391 insertions, 0 deletions
diff --git a/src/main/antlr/com/jogamp/gluegen/cgram/GnuCEmitter.g b/src/main/antlr/com/jogamp/gluegen/cgram/GnuCEmitter.g new file mode 100644 index 0000000..38ded36 --- /dev/null +++ b/src/main/antlr/com/jogamp/gluegen/cgram/GnuCEmitter.g @@ -0,0 +1,1145 @@ +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + Copyright (c) Non, Inc. 1998 -- All Rights Reserved + +PROJECT: C Compiler +MODULE: GnuCEmitter +FILE: GnuCEmitter.g + +AUTHOR: Monty Zukowski ([email protected]) April 28, 1998 + +DESCRIPTION: + + This tree grammar is for a Gnu C AST. + It turns the tree back into source code. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + + +header { + package com.jogamp.gluegen.cgram; + + import java.io.*; + import java.util.*; + + import antlr.CommonAST; + import antlr.DumpASTVisitor; +} + + +class GnuCEmitter extends GnuCTreeParser; + +options + { + importVocab = GNUC; + buildAST = false; + ASTLabelType = "TNode"; + + // Copied following options from java grammar. + codeGenMakeSwitchThreshold = 2; + codeGenBitsetTestThreshold = 3; + } + + +{ + + +int tabs = 0; +PrintStream currentOutput = System.out; +int lineNum = 1; +String currentSource = ""; +LineObject trueSourceFile; +final int lineDirectiveThreshold = Integer.MAX_VALUE; +PreprocessorInfoChannel preprocessorInfoChannel = null; +Stack sourceFiles = new Stack(); + +public GnuCEmitter( PreprocessorInfoChannel preprocChannel ) +{ + preprocessorInfoChannel = preprocChannel; +} + +void initializePrinting() +{ + Vector preprocs = preprocessorInfoChannel.extractLinesPrecedingTokenNumber( new Integer(1) ); + printPreprocs(preprocs); +/* if ( currentSource.equals("") ) { + trueSourceFile = new LineObject(currentSource); + currentOutput.println("# 1 \"" + currentSource + "\"\n"); + sourceFiles.push(trueSourceFile); + } +*/ +} + +void finalizePrinting() { + // flush any leftover preprocessing instructions to the stream + + printPreprocs( + preprocessorInfoChannel.extractLinesPrecedingTokenNumber( + new Integer( preprocessorInfoChannel.getMaxTokenNumber() + 1 ) )); + //print a newline so file ends at a new line + currentOutput.println(); +} + +void printPreprocs( Vector preprocs ) +{ + // if there was a preprocessingDirective previous to this token then + // print a newline and the directive, line numbers handled later + if ( preprocs.size() > 0 ) { + if ( trueSourceFile != null ) { + currentOutput.println(); //make sure we're starting a new line unless this is the first line directive + } + lineNum++; + Enumeration e = preprocs.elements(); + while (e.hasMoreElements()) + { + Object o = e.nextElement(); + if ( o.getClass().getName().equals("LineObject") ) { + LineObject l = (LineObject) o; + + // we always return to the trueSourceFile, we never enter it from another file + // force it to be returning if in fact we aren't currently in trueSourceFile + if (( trueSourceFile != null ) //trueSource exists + && ( !currentSource.equals(trueSourceFile.getSource()) ) //currently not in trueSource + && ( trueSourceFile.getSource().equals(l.getSource()) ) ) { //returning to trueSource + l.setEnteringFile( false ); + l.setReturningToFile( true ); + } + + + // print the line directive + currentOutput.println(l); + lineNum = l.getLine(); + currentSource = l.getSource(); + + + // the very first line directive always represents the true sourcefile + if ( trueSourceFile == null ) { + trueSourceFile = new LineObject(currentSource); + sourceFiles.push(trueSourceFile); + } + + // keep our own stack of files entered + if ( l.getEnteringFile() ) { + sourceFiles.push(l); + } + + // if returning to a file, pop the exited files off the stack + if ( l.getReturningToFile() ) { + LineObject top = (LineObject) sourceFiles.peek(); + while (( top != trueSourceFile ) && (! l.getSource().equals(top.getSource()) )) { + sourceFiles.pop(); + top = (LineObject) sourceFiles.peek(); + } + } + } + else { // it was a #pragma or such + currentOutput.println(o); + lineNum++; + } + } + } + +} + +void print( TNode t ) { + int tLineNum = t.getLocalLineNum(); + if ( tLineNum == 0 ) tLineNum = lineNum; + + Vector preprocs = preprocessorInfoChannel.extractLinesPrecedingTokenNumber((Integer)t.getAttribute("tokenNumber")); + printPreprocs(preprocs); + + if ( (lineNum != tLineNum) ) { + // we know we'll be newlines or a line directive or it probably + // is just the case that this token is on the next line + // either way start a new line and indent it + currentOutput.println(); + lineNum++; + printTabs(); + } + + if ( lineNum == tLineNum ){ + // do nothing special, we're at the right place + } + else { + int diff = tLineNum - lineNum; + if ( lineNum < tLineNum ) { + // print out the blank lines to bring us up to right line number + for ( ; lineNum < tLineNum ; lineNum++ ) { + currentOutput.println(); + } + printTabs(); + } + else { // just reset lineNum + lineNum = tLineNum; + } + } + currentOutput.print( t.getText() + " " ); +} + + +/* This was my attempt at being smart about line numbers + It didn't work quite right but I don't know why, I didn't + have enough test cases. Worked ok compiling rcs and ghostscript +*/ +void printAddingLineDirectives( TNode t ) { + int tLineNum = t.getLocalLineNum(); + String tSource = (String) t.getAttribute("source"); + + if ( tSource == null ) tSource = currentSource; + if ( tLineNum == 0 ) tLineNum = lineNum; + + Vector preprocs = preprocessorInfoChannel.extractLinesPrecedingTokenNumber((Integer)t.getAttribute("tokenNumber")); + printPreprocs(preprocs); + + if ( (lineNum != tLineNum) || !currentSource.equals(tSource) ) { + // we know we'll be newlines or a line directive or it probably + // is just the case that this token is on the next line + // either way start a new line and indent it + currentOutput.println(); + lineNum++; + printTabs(); + } + + if ( ( lineNum == tLineNum ) && ( currentSource.equals(tSource) ) ){ + // do nothing special, we're at the right place + } + else if ( currentSource.equals(tSource) ) { + int diff = tLineNum - lineNum; + if (diff > 0 && diff < lineDirectiveThreshold) { + // print out the blank lines to bring us up to right line number + for ( ; lineNum < tLineNum ; lineNum++ ) { + currentOutput.println(); + } + } + else { // print line directive to get us to right line number + // preserve flags 3 and 4 if present in current file + if ( ! sourceFiles.empty() ) { + LineObject l = (LineObject) sourceFiles.peek(); + StringBuilder tFlags = new StringBuilder(""); + if (l.getSystemHeader()) { + tFlags.append(" 3"); + } + if (l.getTreatAsC()) { + tFlags.append(" 4"); + } + currentOutput.println("# " + tLineNum + " \"" + tSource + "\"" + tFlags.toString()); + lineNum = tLineNum; + } + } + + printTabs(); + } + else { // different source + Enumeration sources = sourceFiles.elements(); + // see if we're returning to a file we entered earlier + boolean returningToEarlierFile = false; + while (sources.hasMoreElements()) { + LineObject l = (LineObject) sources.nextElement(); + if (l.getSource().equals(tSource)) { + returningToEarlierFile = true; + break; + } + } + if (returningToEarlierFile) { + // pop off the files we're exiting, but never pop the trueSourceFile + LineObject l = (LineObject) sourceFiles.peek(); + while ( ( l != trueSourceFile ) &&(! l.getSource().equals(tSource) ) ) { + sourceFiles.pop(); + l = (LineObject) sourceFiles.peek(); + } + + // put in the return flag, plus others as needed + StringBuilder tFlags = new StringBuilder(" 2"); + if (l.getSystemHeader()) { + tFlags.append(" 3"); + } + if (l.getTreatAsC()) { + tFlags.append(" 4"); + } + + currentOutput.println("# " + tLineNum + " \"" + tSource + "\"" + tFlags); + lineNum = tLineNum; + currentSource = tSource; + printTabs(); + } + else { // entering a file that wasn't in the original source + // pretend we're entering it from top of stack + currentOutput.println("# " + tLineNum + " \"" + tSource + "\"" + " 1"); + lineNum = tLineNum; + currentSource = tSource; + printTabs(); + } + } + currentOutput.print( t.getText() + " " ); +} + +/** It is not ok to print newlines from the String passed in as +it will screw up the line number handling **/ +void print( String s ) { + currentOutput.print( s + " " ); +} + +void printTabs() { + for ( int i = 0; i< tabs; i++ ) { + currentOutput.print( "\t" ); + } +} + +void commaSep( TNode t ) { + print( t ); + if ( t.getNextSibling() != null ) { + print( "," ); + } +} + + int traceDepth = 0; + public void reportError(RecognitionException ex) { + if ( ex != null) { + System.err.println("ANTLR Tree Parsing RecognitionException Error: " + ex.getClass().getName() + " " + ex ); + ex.printStackTrace(System.err); + } + } + public void reportError(NoViableAltException ex) { + System.err.println("ANTLR Tree Parsing NoViableAltException Error: " + ex.toString()); + TNode.printTree( ex.node ); + ex.printStackTrace(System.err); + } + public void reportError(MismatchedTokenException ex) { + if ( ex != null) { + TNode.printTree( ex.node ); + System.err.println("ANTLR Tree Parsing MismatchedTokenException Error: " + ex ); + ex.printStackTrace(System.err); + } + } + public void reportError(String s) { + System.err.println("ANTLR Error from String: " + s); + } + public void reportWarning(String s) { + System.err.println("ANTLR Warning from String: " + s); + } + protected void match(AST t, int ttype) throws MismatchedTokenException { + //System.out.println("match("+ttype+"); cursor is "+t); + super.match(t, ttype); + } + public void match(AST t, BitSet b) throws MismatchedTokenException { + //System.out.println("match("+b+"); cursor is "+t); + super.match(t, b); + } + protected void matchNot(AST t, int ttype) throws MismatchedTokenException { + //System.out.println("matchNot("+ttype+"); cursor is "+t); + super.matchNot(t, ttype); + } + public void traceIn(String rname, AST t) { + traceDepth += 1; + for (int x=0; x<traceDepth; x++) System.out.print(" "); + super.traceIn(rname, t); + } + public void traceOut(String rname, AST t) { + for (int x=0; x<traceDepth; x++) System.out.print(" "); + super.traceOut(rname, t); + traceDepth -= 1; + } + + + +} + + +translationUnit options { + defaultErrorHandler=false; +} + : + { initializePrinting(); } + ( externalList )? + { finalizePrinting(); } + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + +externalList + : ( externalDef )+ + ; + + +externalDef + : declaration + | functionDef + | asm_expr + | typelessDeclaration + | s:SEMI { print( s ); } + ; + +typelessDeclaration + : #(NTypeMissing initDeclList s: SEMI) { print( s ); } + ; + + + +asm_expr + : #( a:"asm" { print( a ); } + ( v:"volatile" { print( v ); } + )? + lc:LCURLY { print( lc ); tabs++; } + expr + rc:RCURLY { tabs--; print( rc ); } + s:SEMI { print( s ); } + ) + ; + + +declaration + : #( NDeclaration + declSpecifiers + ( + initDeclList + )? + ( s:SEMI { print( s ); } )+ + ) + ; + + +declSpecifiers + : ( storageClassSpecifier + | typeQualifier + | typeSpecifier + )+ + ; + +storageClassSpecifier + : a:"auto" { print( a ); } + | b:"register" { print( b ); } + | c:"typedef" { print( c ); } + | functionStorageClassSpecifier + ; + + +functionStorageClassSpecifier + : a:"extern" { print( a ); } + | b:"static" { print( b ); } + | c:"inline" { print( c ); } + ; + + +typeQualifier + : a:"const" { print( a ); } + | b:"volatile" { print( b ); } + ; + + +typeSpecifier + : a:"void" { print( a ); } + | b:"char" { print( b ); } + | c:"short" { print( c ); } + | d:"int" { print( d ); } + | e:"long" { print( e ); } + | f:"float" { print( f ); } + | g:"double" { print( g ); } + | h:"signed" { print( h ); } + | i:"unsigned" { print( i ); } + | structSpecifier ( attributeDecl )* + | unionSpecifier ( attributeDecl )* + | enumSpecifier + | typedefName + | #(n:"typeof" lp:LPAREN { print( n ); print( lp ); } + ( (typeName )=> typeName + | expr + ) + rp:RPAREN { print( rp ); } + ) + | p:"__complex" { print( p ); } + ; + + +typedefName + : #(NTypedefName i:ID { print( i ); } ) + ; + + +structSpecifier + : #( a:"struct" { print( a ); } + structOrUnionBody + ) + ; + +unionSpecifier + : #( a:"union" { print( a ); } + structOrUnionBody + ) + ; + +structOrUnionBody + : ( (ID LCURLY) => i1:ID lc1:LCURLY { print( i1 ); print ( "{" ); tabs++; } + ( structDeclarationList )? + rc1:RCURLY { tabs--; print( rc1 ); } + | lc2:LCURLY { print( lc2 ); tabs++; } + ( structDeclarationList )? + rc2:RCURLY { tabs--; print( rc2 ); } + | i2:ID { print( i2 ); } + ) + ; + +structDeclarationList + : ( structDeclaration { print( ";" ); } + )+ + ; + + +structDeclaration + : specifierQualifierList structDeclaratorList + ; + + +specifierQualifierList + : ( + typeSpecifier + | typeQualifier + )+ + ; + + +structDeclaratorList + : structDeclarator + ( { print(","); } structDeclarator )* + ; + + +structDeclarator + : + #( NStructDeclarator + ( declarator )? + ( c:COLON { print( c ); } expr )? + ( attributeDecl )* + ) + ; + + +enumSpecifier + : #( a:"enum" { print( a ); } + ( i:ID { print( i ); } )? + ( lc:LCURLY { print( lc ); tabs++; } + enumList + rc:RCURLY { tabs--; print( rc ); } + )? + ) + ; + + +enumList + : + enumerator ( {print(",");} enumerator)* + ; + + +enumerator + : i:ID { print( i ); } + ( b:ASSIGN { print( b ); } + expr + )? + ; + + +attributeDecl: + #( a:"__attribute" { print( a ); } + (b:. { print( b ); } )* + ) + | #( n:NAsmAttribute { print( n ); } + lp:LPAREN { print( lp ); } + expr { print( ")" ); } + rp:RPAREN { print( rp ); } + ) + ; + +initDeclList + : initDecl + ( { print( "," ); } initDecl )* + ; + + +initDecl + { String declName = ""; } + : #(NInitDecl + declarator + ( attributeDecl )* + ( a:ASSIGN { print( a ); } + initializer + | b:COLON { print( b ); } + expr + )? + ) + ; + + +pointerGroup + : #( NPointerGroup + ( a:STAR { print( a ); } + ( typeQualifier )* + )+ + ) + ; + + + +idList + : i:ID { print( i ); } + ( c:COMMA { print( c ); } + id:ID { print( id ); } + )* + ; + + + +initializer + : #( NInitializer (initializerElementLabel)? expr ) + | lcurlyInitializer + ; + +initializerElementLabel + : #( NInitializerElementLabel + ( + ( l:LBRACKET { print( l ); } + expr + r:RBRACKET { print( r ); } + (a1:ASSIGN { print( a1 ); } )? + ) + | i1:ID c:COLON { print( i1 ); print( c ); } + | d:DOT i2:ID a2:ASSIGN { print( d ); print( i2 ); print( a2 ); } + ) + ) + ; + +lcurlyInitializer + : #(n:NLcurlyInitializer { print( n ); tabs++; } + initializerList + rc:RCURLY { tabs--; print( rc ); } + ) + ; + +initializerList + : ( i:initializer { commaSep( i ); } + )* + ; + + +declarator + : #( NDeclarator + ( pointerGroup )? + + ( id:ID { print( id ); } + | lp:LPAREN { print( lp ); } declarator rp:RPAREN { print( rp ); } + ) + + ( #( n:NParameterTypeList { print( n ); } + ( + parameterTypeList + | (idList)? + ) + r:RPAREN { print( r ); } + ) + | lb:LBRACKET { print( lb );} ( expr )? rb:RBRACKET { print( rb ); } + )* + ) + ; + + + +parameterTypeList + : ( parameterDeclaration + ( c:COMMA { print( c ); } + | s:SEMI { print( s ); } + )? + )+ + ( v:VARARGS { print( v ); } )? + ; + + + +parameterDeclaration + : #( NParameterDeclaration + declSpecifiers + (declarator | nonemptyAbstractDeclarator)? + ) + ; + + +functionDef + : #( NFunctionDef + ( functionDeclSpecifiers)? + declarator + (declaration + | v:VARARGS { print( v ); } + )* + compoundStatement + ) + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + +functionDeclSpecifiers + : + ( functionStorageClassSpecifier + | typeQualifier + | typeSpecifier + )+ + ; + +declarationList + : + ( //ANTLR doesn't know that declarationList properly eats all the declarations + //so it warns about the ambiguity + options { + warnWhenFollowAmbig = false; + } : + localLabelDecl + | declaration + )+ + ; + +localLabelDecl + : #(a:"__label__" { print( a ); } + ( i:ID { commaSep( i ); } + )+ + { print( ";" ); } + ) + ; + + + +compoundStatement + : #( cs:NCompoundStatement { print( cs ); tabs++; } + ( declarationList + | functionDef + )* + ( statementList )? + rc:RCURLY { tabs--; print( rc ); } + ) + + ; + +statementList + : ( statement )+ + ; + +statement + : statementBody + ; + +statementBody + : s:SEMI { print( s ); } + + | compoundStatement // Group of statements + + | #(NStatementExpr + expr { print( ";" ); } + ) // Expressions + +// Iteration statements: + + | #( w:"while" { print( w ); print( "(" ); } + expr { print( ")" ); } + statement ) + + | #( d:"do" { print( d ); } + statement + { print( " while ( " ); } + expr + { print( " );" ); } + ) + + | #( f:"for" { print( f ); print( "(" ); } + expr { print( ";" ); } + expr { print( ";" ); } + expr { print( ")" ); } + statement + ) + + +// Jump statements: + + | #( g:"goto" { print( g );} + expr { print( ";" ); } + ) + | c:"continue" { print( c ); print( ";" );} + | b:"break" { print( b ); print( ";" );} + | #( r:"return" { print( r ); } + ( expr )? + { print( ";" ); } + ) + + +// Labeled statements: + | #( NLabel + ni:ID { print( ni ); print( ":" ); } + ( statement )? + ) + + | #( + ca:"case" { print( ca ); } + expr { print( ":" ); } + (statement)? + ) + + | #( + de:"default" { print( de ); print( ":" ); } + (statement)? + ) + + + +// Selection statements: + + | #( i:"if" { print( i ); print( "(" ); } + expr { print( ")" ); } + statement + ( e:"else" { print( e ); } + statement + )? + ) + | #( sw:"switch" { print( sw ); print( "(" ); } + expr { print( ")" ); } + statement + ) + + + + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + + + + + +expr + : + binaryExpr + | conditionalExpr + | castExpr + | unaryExpr + | postfixExpr + | primaryExpr + | emptyExpr + | compoundStatementExpr + | initializer + | rangeExpr + | gnuAsmExpr + ; + +emptyExpr + : NEmptyExpression + ; + +compoundStatementExpr + : #(l:LPAREN { print( l ); } + compoundStatement + r:RPAREN { print( r ); } + ) + ; + +rangeExpr + : #(NRangeExpr expr v:VARARGS{ print( v ); } expr) + ; + +gnuAsmExpr + : #(n:NGnuAsmExpr { print( n ); } + (v:"volatile" { print( v ); } )? + lp:LPAREN { print( lp ); } + stringConst + ( options { warnWhenFollowAmbig = false; }: + c1:COLON { print( c1 );} + (strOptExprPair + ( c2:COMMA { print( c2 ); } strOptExprPair)* + )? + ( options { warnWhenFollowAmbig = false; }: + c3:COLON { print( c3 ); } + (strOptExprPair + ( c4:COMMA { print( c4 ); } strOptExprPair)* + )? + )? + )? + ( c5:COLON { print( c5 ); } + stringConst + ( c6:COMMA { print( c6 ); } + stringConst + )* + )? + rp:RPAREN { print( rp ); } + ) + ; + +strOptExprPair + : stringConst + ( + l:LPAREN { print( l ); } + expr + r:RPAREN { print( r ); } + )? + ; + +binaryOperator + : ASSIGN + | DIV_ASSIGN + | PLUS_ASSIGN + | MINUS_ASSIGN + | STAR_ASSIGN + | MOD_ASSIGN + | RSHIFT_ASSIGN + | LSHIFT_ASSIGN + | BAND_ASSIGN + | BOR_ASSIGN + | BXOR_ASSIGN + | LOR + | LAND + | BOR + | BXOR + | BAND + | EQUAL + | NOT_EQUAL + | LT + | LTE + | GT + | GTE + | LSHIFT + | RSHIFT + | PLUS + | MINUS + | STAR + | DIV + | MOD + | NCommaExpr + ; + +binaryExpr + : b:binaryOperator + // no rules allowed as roots, so here I manually get + // the first and second children of the binary operator + // and then print them out in the right order + { TNode e1, e2; + e1 = (TNode) b.getFirstChild(); + e2 = (TNode) e1.getNextSibling(); + expr( e1 ); + print( b ); + expr( e2 ); + } + + ; + + +conditionalExpr + : #( q:QUESTION + expr { print( q ); } + ( expr )? + c:COLON { print( c ); } + expr + ) + ; + + +castExpr + : #( + c:NCast { print( c ); } + typeName + rp:RPAREN { print( rp ); } + expr + ) + ; + + +typeName + : specifierQualifierList (nonemptyAbstractDeclarator)? + ; + +nonemptyAbstractDeclarator + : #( NNonemptyAbstractDeclarator + ( pointerGroup + ( (lp1:LPAREN { print( lp1 ); } + ( nonemptyAbstractDeclarator + | parameterTypeList + )? + rp1:RPAREN { print( rp1 ); } + ) + | ( + lb1:LBRACKET { print( lb1 ); } + (expr)? + rb1:RBRACKET { print( rb1 ); } + ) + )* + + | ( (lp2:LPAREN { print( lp2 ); } + ( nonemptyAbstractDeclarator + | parameterTypeList + )? + rp2:RPAREN { print( rp2 ); } + ) + | ( + lb2:LBRACKET { print( lb2 ); } + (expr)? + rb2:RBRACKET { print( rb2 ); } + ) + )+ + ) + ) + ; + + + +unaryExpr + : #( i:INC { print( i ); } expr ) + | #( d:DEC { print( d ); } expr ) + | #( NUnaryExpr u:unaryOperator { print( u ); } expr) + | #( s:"sizeof" { print( s ); } + ( ( LPAREN typeName )=> + lps:LPAREN { print( lps ); } + typeName + rps:RPAREN { print( rps ); } + | expr + ) + ) + | #( a:"__alignof" { print( a ); } + ( ( LPAREN typeName )=> + lpa:LPAREN { print( lpa ); } + typeName + rpa:RPAREN { print( rpa ); } + | expr + ) + ) + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + unaryOperator + : BAND + | STAR + | PLUS + | MINUS + | BNOT + | LNOT + | LAND + | "__real" + | "__imag" + ; + + +postfixExpr + : #( NPostfixExpr + primaryExpr + ( a:PTR b:ID { print( a ); print( b ); } + | c:DOT d:ID { print( c ); print( d ); } + | #( n:NFunctionCallArgs { print( n ); } + (argExprList)? + rp:RPAREN { print( rp ); } + ) + | lb:LBRACKET { print( lb ); } + expr + rb:RBRACKET { print( rb ); } + | f:INC { print( f ); } + | g:DEC { print( g ); } + )+ + ) + ; + + + +primaryExpr + : i:ID { print( i ); } + | n:Number { print( n ); } + | charConst + | stringConst + +// JTC: +// ID should catch the enumerator +// leaving it in gives ambiguous err +// | enumerator + + | #( eg:NExpressionGroup { print( eg ); } + expr { print( ")" ); } + ) + ; + + + +argExprList + : expr ( {print( "," );} expr )* + ; + + + +protected +charConst + : c:CharLiteral { print( c ); } + ; + + +protected +stringConst + : #( NStringSeq + ( + s:StringLiteral { print( s ); } + )+ + ) + ; + + +protected +intConst + : IntOctalConst + | LongOctalConst + | UnsignedOctalConst + | IntIntConst + | LongIntConst + | UnsignedIntConst + | IntHexConst + | LongHexConst + | UnsignedHexConst + ; + + +protected +floatConst + : FloatDoubleConst + | DoubleDoubleConst + | LongDoubleConst + ; + + + + + + + + + + diff --git a/src/main/antlr/com/jogamp/gluegen/cgram/GnuCParser.g b/src/main/antlr/com/jogamp/gluegen/cgram/GnuCParser.g new file mode 100644 index 0000000..e8ca8c5 --- /dev/null +++ b/src/main/antlr/com/jogamp/gluegen/cgram/GnuCParser.g @@ -0,0 +1,878 @@ +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + Copyright (c) Non, Inc. 1998 -- All Rights Reserved + +PROJECT: C Compiler +MODULE: GnuCParser +FILE: GnuCParser.g + +AUTHOR: Monty Zukowski ([email protected]) April 28, 1998 + +DESCRIPTION: + This is a grammar for the GNU C compiler. It is a + grammar subclass of StdCParser, overriding only those + rules which are different from Standard C. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + + +header { + package com.jogamp.gluegen.cgram; + + import java.io.*; + + import antlr.CommonAST; + import antlr.DumpASTVisitor; +} + + +class GnuCParser extends StdCParser; + +options + { + k = 2; + exportVocab = GNUC; + buildAST = true; + ASTLabelType = "TNode"; + + // Copied following options from java grammar. + codeGenMakeSwitchThreshold = 2; + codeGenBitsetTestThreshold = 3; + } + + +{ + // Suppport C++-style single-line comments? + public static boolean CPPComments = true; + + // access to symbol table + public CSymbolTable symbolTable = new CSymbolTable(); + + // source for names to unnamed scopes + protected int unnamedScopeCounter = 0; + + public boolean isTypedefName(String name) { + boolean returnValue = false; + TNode node = symbolTable.lookupNameInCurrentScope(name); + for (; node != null; node = (TNode) node.getNextSibling() ) { + if(node.getType() == LITERAL_typedef) { + returnValue = true; + break; + } + } + return returnValue; + } + + + public String getAScopeName() { + return "" + (unnamedScopeCounter++); + } + + public void pushScope(String scopeName) { + symbolTable.pushScope(scopeName); + } + + public void popScope() { + symbolTable.popScope(); + } + + int traceDepth = 0; + public void reportError(RecognitionException ex) { + try { + System.err.println("ANTLR Parsing Error: "+ex + " token name:" + tokenNames[LA(1)]); + ex.printStackTrace(System.err); + } + catch (TokenStreamException e) { + System.err.println("ANTLR Parsing Error: "+ex); + ex.printStackTrace(System.err); + } + } + public void reportError(String s) { + System.err.println("ANTLR Parsing Error from String: " + s); + } + public void reportWarning(String s) { + System.err.println("ANTLR Parsing Warning from String: " + s); + } + public void match(int t) throws MismatchedTokenException { + boolean debugging = false; + + if ( debugging ) { + for (int x=0; x<traceDepth; x++) System.out.print(" "); + try { + System.out.println("Match("+tokenNames[t]+") with LA(1)="+ + tokenNames[LA(1)] + ((inputState.guessing>0)?" [inputState.guessing "+ inputState.guessing + "]":"")); + } + catch (TokenStreamException e) { + System.out.println("Match("+tokenNames[t]+") " + ((inputState.guessing>0)?" [inputState.guessing "+ inputState.guessing + "]":"")); + + } + + } + try { + if ( LA(1)!=t ) { + if ( debugging ){ + for (int x=0; x<traceDepth; x++) System.out.print(" "); + System.out.println("token mismatch: "+tokenNames[LA(1)] + + "!="+tokenNames[t]); + } + throw new MismatchedTokenException(tokenNames, LT(1), t, false, getFilename()); + + } else { + // mark token as consumed -- fetch next token deferred until LA/LT + consume(); + } + } + catch (TokenStreamException e) { + } + + } + public void traceIn(String rname) { + traceDepth += 1; + for (int x=0; x<traceDepth; x++) System.out.print(" "); + try { + System.out.println("> "+rname+"; LA(1)==("+ tokenNames[LT(1).getType()] + + ") " + LT(1).getText() + " [inputState.guessing "+ inputState.guessing + "]"); + } + catch (TokenStreamException e) { + } + } + public void traceOut(String rname) { + for (int x=0; x<traceDepth; x++) System.out.print(" "); + try { + System.out.println("< "+rname+"; LA(1)==("+ tokenNames[LT(1).getType()] + + ") "+LT(1).getText() + " [inputState.guessing "+ inputState.guessing + "]"); + } + catch (TokenStreamException e) { + } + traceDepth -= 1; + } + +} + + +translationUnit + : ( externalList )? /* Empty source files are allowed. */ + ; +asm_expr + : "asm"^ + ("volatile")? LCURLY expr RCURLY ( SEMI )+ + ; + +idList + : ID ( options{warnWhenFollowAmbig=false;}: COMMA ID )* + ; + +externalDef + : ( "typedef" | declaration )=> declaration + | ( functionPrefix )=> functionDef + | typelessDeclaration + | asm_expr + | SEMI + ; + +/* these two are here because GCC allows "cat = 13;" as a valid program! */ +functionPrefix + { String declName; } + : ( (functionDeclSpecifiers)=> ds:functionDeclSpecifiers + | //epsilon + ) + declName = d:declarator[true] + ( declaration )* (VARARGS)? ( SEMI )* + LCURLY + ; + +typelessDeclaration + { AST typeMissing = #[NTypeMissing]; } + : initDeclList[typeMissing] SEMI { ## = #( #[NTypeMissing], ##); } + ; + +initializer + : ( ( ( (initializerElementLabel)=> initializerElementLabel )? + ( assignExpr | lcurlyInitializer ) { ## = #( #[NInitializer], ## ); } + ) + | lcurlyInitializer + ) + ; + +// GCC allows more specific initializers +initializerElementLabel + : ( ( LBRACKET ((constExpr VARARGS)=> rangeExpr | constExpr) RBRACKET (ASSIGN)? ) + | ID COLON + | DOT ID ASSIGN + ) + { ## = #( #[NInitializerElementLabel], ##) ; } + ; + +// GCC allows empty initializer lists +lcurlyInitializer + : + LCURLY^ (initializerList ( COMMA! )? )? RCURLY + { ##.setType( NLcurlyInitializer ); } + ; + +initializerList + : initializer ( options{warnWhenFollowAmbig=false;}:COMMA! initializer )* + ; + + +declarator[boolean isFunctionDefinition] returns [String declName] + { declName = ""; } + : + ( pointerGroup )? + + ( id:ID { declName = id.getText(); } + | LPAREN declName = declarator[false] RPAREN + ) + + ( declaratorParamaterList[isFunctionDefinition, declName] + | LBRACKET ( expr )? RBRACKET + )* + { ## = #( #[NDeclarator], ## ); } + ; + +declaratorParamaterList[boolean isFunctionDefinition, String declName] + : + LPAREN^ + { + if (isFunctionDefinition) { + pushScope(declName); + } + else { + pushScope("!"+declName); + } + } + ( + (declSpecifiers)=> parameterTypeList + | (idList)? + ) + { + popScope(); + } + ( COMMA! )? + RPAREN + { ##.setType(NParameterTypeList); } + ; + +parameterTypeList + : parameterDeclaration + ( options { + warnWhenFollowAmbig = false; + } : + ( COMMA | SEMI ) + parameterDeclaration + )* + ( ( COMMA | SEMI ) + VARARGS + )? + ; + + +declarationList + : ( options { // this loop properly aborts when + // it finds a non-typedefName ID MBZ + warnWhenFollowAmbig = false; + } : + + localLabelDeclaration + | ( declarationPredictor )=> declaration + )+ + ; +localLabelDeclaration + : ( //GNU note: any __label__ declarations must come before regular declarations. + "__label__"^ ID (options{warnWhenFollowAmbig=false;}: COMMA! ID)* ( COMMA! )? ( SEMI! )+ + ) + ; + + +declaration + { AST ds1 = null; } + : ds:declSpecifiers { ds1 = astFactory.dupList(#ds); } + ( + initDeclList[ds1] + )? + ( SEMI )+ + { ## = #( #[NDeclaration], ##); } + + ; + +functionStorageClassSpecifier + : "extern" + | "static" + | "inline" + ; + +typeSpecifier [int specCount] returns [int retSpecCount] + { retSpecCount = specCount + 1; } + : + ( "void" + | "char" + | "short" + | "int" + | "long" + | "float" + | "double" + | "signed" + | "unsigned" + | "int8_t" + | "uint8_t" + | "int16_t" + | "uint16_t" + | "__int32" + | "int32_t" + | "wchar_t" + | "uint32_t" + | "__int64" + | "int64_t" + | "uint64_t" + | "ptrdiff_t" + | "intptr_t" + | "size_t" + | "uintptr_t" + | structOrUnionSpecifier ( options{warnWhenFollowAmbig=false;}: attributeDecl )* + | enumSpecifier + | { specCount==0 }? typedefName + | "typeof"^ LPAREN + ( ( typeName )=> typeName + | expr + ) + RPAREN + | "__complex" + ) + ; + + +structOrUnionSpecifier + { String scopeName; } + : sou:structOrUnion! + ( ( ID LCURLY )=> i:ID l:LCURLY + { + scopeName = #sou.getText() + " " + #i.getText(); + #l.setText(scopeName); + pushScope(scopeName); + } + ( structDeclarationList )? + { popScope();} + RCURLY + | l1:LCURLY + { + scopeName = getAScopeName(); + #l1.setText(scopeName); + pushScope(scopeName); + } + ( structDeclarationList )? + { popScope(); } + RCURLY + | ID + ) + { + ## = #( #sou, ## ); + } + ; + + +structDeclaration + : specifierQualifierList structDeclaratorList ( COMMA! )? ( SEMI! )+ + ; + +structDeclaratorList + : structDeclarator ( options{warnWhenFollowAmbig=false;}: COMMA! structDeclarator )* + ; + +structDeclarator + : ( declarator[false] )? + ( COLON constExpr )? + ( attributeDecl )* + { ## = #( #[NStructDeclarator], ##); } + ; + + + +enumSpecifier + : "enum"^ + ( ( ID LCURLY )=> i:ID LCURLY enumList[i.getText()] RCURLY + | LCURLY enumList["anonymous"] RCURLY + | ID + ) + ; +enumList[String enumName] + : enumerator[enumName] ( options{warnWhenFollowAmbig=false;}: COMMA! enumerator[enumName] )* ( COMMA! )? + ; + + +initDeclList[AST declarationSpecifiers] + : initDecl[declarationSpecifiers] + ( options{warnWhenFollowAmbig=false;}: COMMA! initDecl[declarationSpecifiers] )* + ( COMMA! )? + ; + +initDecl[AST declarationSpecifiers] + { String declName = ""; } + : declName = d:declarator[false] + { AST ds1, d1; + ds1 = astFactory.dupList(declarationSpecifiers); + d1 = astFactory.dupList(#d); + symbolTable.add(declName, #(null, ds1, d1) ); + } + ( attributeDecl )* + ( ASSIGN initializer + | COLON expr + )? + { ## = #( #[NInitDecl], ## ); } + ; + +attributeDecl + : "__attribute"^ LPAREN LPAREN attributeList RPAREN RPAREN + | "asm"^ LPAREN stringConst RPAREN { ##.setType( NAsmAttribute ); } + ; + +attributeList + : attribute ( options{warnWhenFollowAmbig=false;}: COMMA attribute)* ( COMMA )? + ; + +attribute + : ( ~(LPAREN | RPAREN | COMMA) + | LPAREN attributeList RPAREN + )* + ; +compoundStatement[String scopeName] + : LCURLY^ + + { + pushScope(scopeName); + } + ( //this ambiguity is ok, declarationList and nestedFunctionDef end properly + options { + warnWhenFollowAmbig = false; + } : + ( "typedef" | "__label__" | declaration )=> declarationList + | (nestedFunctionDef)=> nestedFunctionDef + )* + ( statementList )? + { popScope(); } + RCURLY + { ##.setType( NCompoundStatement ); ##.setAttribute( "scopeName", scopeName ); } + ; + +nestedFunctionDef + { String declName; } + : ( "auto" )? //only for nested functions + ( (functionDeclSpecifiers)=> ds:functionDeclSpecifiers + )? + declName = d:declarator[false] + { + AST d2, ds2; + d2 = astFactory.dupList(#d); + ds2 = astFactory.dupList(#ds); + symbolTable.add(declName, #(null, ds2, d2)); + pushScope(declName); + } + ( declaration )* + { popScope(); } + compoundStatement[declName] + { ## = #( #[NFunctionDef], ## );} + ; + +statement + : SEMI // Empty statements + + | compoundStatement[getAScopeName()] // Group of statements + + | expr SEMI! { ## = #( #[NStatementExpr], ## );} // Expressions + +// Iteration statements: + + | "while"^ LPAREN! expr RPAREN! statement + | "do"^ statement "while"! LPAREN! expr RPAREN! SEMI! + |! "for" + LPAREN ( e1:expr )? SEMI ( e2:expr )? SEMI ( e3:expr )? RPAREN + s:statement + { + if ( #e1 == null) { #e1 = (TNode) #[ NEmptyExpression ]; } + if ( #e2 == null) { #e2 = (TNode) #[ NEmptyExpression ]; } + if ( #e3 == null) { #e3 = (TNode) #[ NEmptyExpression ]; } + ## = #( #[LITERAL_for, "for"], #e1, #e2, #e3, #s ); + } + + +// Jump statements: + + | "goto"^ expr SEMI! + | "continue" SEMI! + | "break" SEMI! + | "return"^ ( expr )? SEMI! + + + | ID COLON! (options {warnWhenFollowAmbig=false;}: statement)? { ## = #( #[NLabel], ## ); } +// GNU allows range expressions in case statements + | "case"^ ((constExpr VARARGS)=> rangeExpr | constExpr) COLON! ( options{warnWhenFollowAmbig=false;}:statement )? + | "default"^ COLON! ( options{warnWhenFollowAmbig=false;}: statement )? + +// Selection statements: + + | "if"^ + LPAREN! expr RPAREN! statement + ( //standard if-else ambiguity + options { + warnWhenFollowAmbig = false; + } : + "else" statement )? + | "switch"^ LPAREN! expr RPAREN! statement + ; + + + +conditionalExpr + : logicalOrExpr + ( QUESTION^ (expr)? COLON conditionalExpr )? + ; + +rangeExpr //used in initializers only + : constExpr VARARGS constExpr + { ## = #(#[NRangeExpr], ##); } + ; + +castExpr + : ( LPAREN typeName RPAREN )=> + LPAREN^ typeName RPAREN ( castExpr | lcurlyInitializer ) + { ##.setType(NCast); } + + | unaryExpr + ; +nonemptyAbstractDeclarator + : ( + pointerGroup + ( (LPAREN + ( nonemptyAbstractDeclarator + | parameterTypeList + )? + ( COMMA! )? + RPAREN) + | (LBRACKET (expr)? RBRACKET) + )* + + | ( (LPAREN + ( nonemptyAbstractDeclarator + | parameterTypeList + )? + ( COMMA! )? + RPAREN) + | (LBRACKET (expr)? RBRACKET) + )+ + ) + { ## = #( #[NNonemptyAbstractDeclarator], ## ); } + + ; + + + +unaryExpr + : postfixExpr + | INC^ castExpr + | DEC^ castExpr + | u:unaryOperator castExpr { ## = #( #[NUnaryExpr], ## ); } + + | "sizeof"^ + ( ( LPAREN typeName )=> LPAREN typeName RPAREN + | unaryExpr + ) + | "__alignof"^ + ( ( LPAREN typeName )=> LPAREN typeName RPAREN + | unaryExpr + ) + | gnuAsmExpr + ; + +unaryOperator + : BAND + | STAR + | PLUS + | MINUS + | BNOT //also stands for complex conjugation + | LNOT + | LAND //for label dereference (&&label) + | "__real" + | "__imag" + ; + +gnuAsmExpr + : "asm"^ ("volatile")? + LPAREN stringConst + ( options { warnWhenFollowAmbig = false; }: + COLON (strOptExprPair ( COMMA strOptExprPair)* )? + ( options { warnWhenFollowAmbig = false; }: + COLON (strOptExprPair ( COMMA strOptExprPair)* )? + )? + )? + ( COLON stringConst ( COMMA stringConst)* )? + RPAREN + { ##.setType(NGnuAsmExpr); } + ; + +//GCC requires the PARENs +strOptExprPair + : stringConst ( LPAREN expr RPAREN )? + ; + + +primaryExpr + : ID + | Number + | charConst + | stringConst +// JTC: +// ID should catch the enumerator +// leaving it in gives ambiguous err +// | enumerator + | (LPAREN LCURLY) => LPAREN^ compoundStatement[getAScopeName()] RPAREN + | LPAREN^ expr RPAREN { ##.setType(NExpressionGroup); } + ; + + +{ + import java.io.*; + import java.util.*; + import antlr.*; +} + +class GnuCLexer extends StdCLexer; +options + { + k = 3; + importVocab = GNUC; + testLiterals = false; + } +tokens { + LITERAL___extension__ = "__extension__"; +} + +{ + public void initialize(String src) + { + setOriginalSource(src); + initialize(); + } + + public void initialize() + { + literals.put(new ANTLRHashString("__alignof__", this), new Integer(LITERAL___alignof)); + literals.put(new ANTLRHashString("__asm", this), new Integer(LITERAL_asm)); + literals.put(new ANTLRHashString("__asm__", this), new Integer(LITERAL_asm)); + literals.put(new ANTLRHashString("__attribute__", this), new Integer(LITERAL___attribute)); + literals.put(new ANTLRHashString("__complex__", this), new Integer(LITERAL___complex)); + literals.put(new ANTLRHashString("__const", this), new Integer(LITERAL_const)); + literals.put(new ANTLRHashString("__const__", this), new Integer(LITERAL_const)); + literals.put(new ANTLRHashString("__imag__", this), new Integer(LITERAL___imag)); + literals.put(new ANTLRHashString("__inline", this), new Integer(LITERAL_inline)); + literals.put(new ANTLRHashString("__inline__", this), new Integer(LITERAL_inline)); + literals.put(new ANTLRHashString("__real__", this), new Integer(LITERAL___real)); + literals.put(new ANTLRHashString("__signed", this), new Integer(LITERAL_signed)); + literals.put(new ANTLRHashString("__signed__", this), new Integer(LITERAL_signed)); + literals.put(new ANTLRHashString("__typeof", this), new Integer(LITERAL_typeof)); + literals.put(new ANTLRHashString("__typeof__", this), new Integer(LITERAL_typeof)); + literals.put(new ANTLRHashString("__volatile", this), new Integer(LITERAL_volatile)); + literals.put(new ANTLRHashString("__volatile__", this), new Integer(LITERAL_volatile)); + } + + + LineObject lineObject = new LineObject(); + String originalSource = ""; + PreprocessorInfoChannel preprocessorInfoChannel = new PreprocessorInfoChannel(); + int tokenNumber = 0; + boolean countingTokens = true; + int deferredLineCount = 0; + List defines = new ArrayList(); + + public void setCountingTokens(boolean ct) + { + countingTokens = ct; + if ( countingTokens ) { + tokenNumber = 0; + } + else { + tokenNumber = 1; + } + } + + public void setOriginalSource(String src) + { + originalSource = src; + lineObject.setSource(src); + } + public void setSource(String src) + { + lineObject.setSource(src); + } + + public PreprocessorInfoChannel getPreprocessorInfoChannel() + { + return preprocessorInfoChannel; + } + + public void setPreprocessingDirective(String pre) + { + preprocessorInfoChannel.addLineForTokenNumber( pre, new Integer(tokenNumber) ); + } + + public void addDefine(String name, String value) + { + defines.add(new Define(name, value)); + } + + /** Returns a list of Define objects corresponding to the + preprocessor definitions seen during parsing. */ + public List getDefines() { + return defines; + } + + protected Token makeToken(int t) + { + if ( t != Token.SKIP && countingTokens) { + tokenNumber++; + } + CToken tok = (CToken) super.makeToken(t); + tok.setLine(lineObject.line); + tok.setSource(lineObject.source); + tok.setTokenNumber(tokenNumber); + + lineObject.line += deferredLineCount; + deferredLineCount = 0; + return tok; + } + + public void deferredNewline() { + deferredLineCount++; + } + + public void newline() { + lineObject.newline(); + } + + + + + + +} +Whitespace + : ( ( ' ' | '\t' | '\014') + | "\r\n" { newline(); } + | ( '\n' | '\r' ) { newline(); } + ) { _ttype = Token.SKIP; } + ; + + +protected +Escape + : '\\' + ( options{warnWhenFollowAmbig=false;}: + ~('0'..'7' | 'x') + | ('0'..'3') ( options{warnWhenFollowAmbig=false;}: Digit )* + | ('4'..'7') ( options{warnWhenFollowAmbig=false;}: Digit )* + | 'x' ( options{warnWhenFollowAmbig=false;}: Digit | 'a'..'f' | 'A'..'F' )+ + ) + ; + +protected IntSuffix + : 'L' + | 'l' + | 'U' + | 'u' + | 'I' + | 'i' + | 'J' + | 'j' + ; +protected NumberSuffix + : + IntSuffix + | 'F' + | 'f' + ; + +Number + : ( ('-')? ( Digit )+ ( '.' | 'e' | 'E' ) )=> ('-')? ( Digit )+ + ( '.' ( Digit )* ( Exponent )? + | Exponent + ) + ( NumberSuffix + )* + + | ( "..." )=> "..." { _ttype = VARARGS; } + + | '.' { _ttype = DOT; } + ( ( Digit )+ ( Exponent )? + { _ttype = Number; } + ( NumberSuffix + )* + )? + + | '0' ( '0'..'7' )* + ( NumberSuffix + )* + + | ('-')? '1'..'9' ( Digit )* + ( NumberSuffix + )* + + | '0' ( 'x' | 'X' ) ( 'a'..'f' | 'A'..'F' | Digit )+ + ( IntSuffix + )* + ; + +IDMEAT + : + i:ID { + + if ( i.getType() == LITERAL___extension__ ) { + $setType(Token.SKIP); + } + else { + $setType(i.getType()); + } + + } + ; + +protected ID + options + { + testLiterals = true; + } + : ( 'a'..'z' | 'A'..'Z' | '_' | '$') + ( 'a'..'z' | 'A'..'Z' | '_' | '$' | '0'..'9' )* + ; + +WideCharLiteral + : + 'L' CharLiteral + { $setType(CharLiteral); } + ; + + + +WideStringLiteral + : + 'L' StringLiteral + { $setType(StringLiteral); } + ; + +StringLiteral + : + '"' + ( ('\\' ~('\n'))=> Escape + | ( '\r' { newline(); } + | '\n' { + newline(); + } + | '\\' '\n' { + newline(); + } + ) + | ~( '"' | '\r' | '\n' | '\\' ) + )* + '"' + ; + + + + diff --git a/src/main/antlr/com/jogamp/gluegen/cgram/GnuCTreeParser.g b/src/main/antlr/com/jogamp/gluegen/cgram/GnuCTreeParser.g new file mode 100644 index 0000000..dbe2f98 --- /dev/null +++ b/src/main/antlr/com/jogamp/gluegen/cgram/GnuCTreeParser.g @@ -0,0 +1,867 @@ +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + Copyright (c) Non, Inc. 1998 -- All Rights Reserved + +PROJECT: C Compiler +MODULE: GnuCTreeParser +FILE: GnuCTreeParser.g + +AUTHOR: Monty Zukowski ([email protected]) April 28, 1998 + +DESCRIPTION: + + This tree grammar is for a Gnu C AST. No actions in it, + subclass to do something useful. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + + +header { + package com.jogamp.gluegen.cgram; + + import java.io.*; + + import antlr.CommonAST; + import antlr.DumpASTVisitor; +} + + +class GnuCTreeParser extends TreeParser; + +options + { + importVocab = GNUC; + buildAST = false; + ASTLabelType = "TNode"; + + // Copied following options from java grammar. + codeGenMakeSwitchThreshold = 2; + codeGenBitsetTestThreshold = 3; + } + + +{ + int traceDepth = 0; + public void reportError(RecognitionException ex) { + if ( ex != null) { + System.err.println("ANTLR Tree Parsing RecognitionException Error: " + ex.getClass().getName() + " " + ex ); + ex.printStackTrace(System.err); + } + } + public void reportError(NoViableAltException ex) { + System.err.println("ANTLR Tree Parsing NoViableAltException Error: " + ex.toString()); + TNode.printTree( ex.node ); + ex.printStackTrace(System.err); + } + public void reportError(MismatchedTokenException ex) { + if ( ex != null) { + TNode.printTree( ex.node ); + System.err.println("ANTLR Tree Parsing MismatchedTokenException Error: " + ex ); + ex.printStackTrace(System.err); + } + } + public void reportError(String s) { + System.err.println("ANTLR Error from String: " + s); + } + public void reportWarning(String s) { + System.err.println("ANTLR Warning from String: " + s); + } + protected void match(AST t, int ttype) throws MismatchedTokenException { + //System.out.println("match("+ttype+"); cursor is "+t); + super.match(t, ttype); + } + public void match(AST t, BitSet b) throws MismatchedTokenException { + //System.out.println("match("+b+"); cursor is "+t); + super.match(t, b); + } + protected void matchNot(AST t, int ttype) throws MismatchedTokenException { + //System.out.println("matchNot("+ttype+"); cursor is "+t); + super.matchNot(t, ttype); + } + public void traceIn(String rname, AST t) { + traceDepth += 1; + for (int x=0; x<traceDepth; x++) System.out.print(" "); + super.traceIn(rname, t); + } + public void traceOut(String rname, AST t) { + for (int x=0; x<traceDepth; x++) System.out.print(" "); + super.traceOut(rname, t); + traceDepth -= 1; + } + + +} + +translationUnit options { + defaultErrorHandler=false; +} + : ( externalList )? + ; + +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + +externalList + : ( externalDef )+ + ; + + +externalDef + : declaration + | functionDef + | asm_expr + | SEMI + | typelessDeclaration + ; + +typelessDeclaration + : #(NTypeMissing initDeclList SEMI) + ; + + + +asm_expr + : #( "asm" ( "volatile" )? LCURLY expr RCURLY ( SEMI )+ ) + ; + + +declaration + : #( NDeclaration + declSpecifiers + ( + initDeclList + )? + ( SEMI )+ + ) + ; + + +declSpecifiers + : ( storageClassSpecifier + | typeQualifier + | typeSpecifier + )+ + ; + +storageClassSpecifier + : "auto" + | "register" + | "typedef" + | functionStorageClassSpecifier + ; + + +functionStorageClassSpecifier + : "extern" + | "static" + | "inline" + ; + + +typeQualifier + : "const" + | "volatile" + ; + + +typeSpecifier + : "void" + | "char" + | "short" + | "int" + | "long" + | "float" + | "double" + | "signed" + | "unsigned" + | "int8_t" + | "uint8_t" + | "int16_t" + | "uint16_t" + | "__int32" + | "int32_t" + | "wchar_t" + | "uint32_t" + | "__int64" + | "int64_t" + | "uint64_t" + | "ptrdiff_t" + | "intptr_t" + | "size_t" + | "uintptr_t" + | structSpecifier ( attributeDecl )* + | unionSpecifier ( attributeDecl )* + | enumSpecifier + | typedefName + | #("typeof" LPAREN + ( (typeName )=> typeName + | expr + ) + RPAREN + ) + | "__complex" + ; + + +typedefName + : #(NTypedefName ID) + ; + + +structSpecifier + : #( "struct" structOrUnionBody ) + ; + +unionSpecifier + : #( "union" structOrUnionBody ) + ; + +structOrUnionBody + : ( (ID LCURLY) => ID LCURLY + ( structDeclarationList )? + RCURLY + | LCURLY + ( structDeclarationList )? + RCURLY + | ID + ) + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + +structDeclarationList + : ( structDeclaration )+ + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + + +structDeclaration + : specifierQualifierList structDeclaratorList + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + + +specifierQualifierList + : ( + typeSpecifier + | typeQualifier + )+ + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + + +structDeclaratorList + : ( structDeclarator )+ + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + + +structDeclarator + : + #( NStructDeclarator + ( declarator )? + ( COLON expr )? + ( attributeDecl )* + ) + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + + + +enumSpecifier + : #( "enum" + ( ID )? + ( LCURLY enumList RCURLY )? + ) + ; + + +enumList + : ( enumerator )+ + ; + + +enumerator + : ID ( ASSIGN expr )? + ; + + + +attributeDecl: + #( "__attribute" (.)* ) + | #( NAsmAttribute LPAREN expr RPAREN ) + ; + +initDeclList + : ( initDecl )+ + ; + + +initDecl + { String declName = ""; } + : #( NInitDecl + declarator + ( attributeDecl )* + ( ASSIGN initializer + | COLON expr + )? + ) + ; + + +pointerGroup + : #( NPointerGroup ( STAR ( typeQualifier )* )+ ) + ; + + + +idList + : ID ( COMMA ID )* + ; + + + +initializer + : #( NInitializer (initializerElementLabel)? expr ) + | lcurlyInitializer + ; + +initializerElementLabel + : #( NInitializerElementLabel + ( + ( LBRACKET expr RBRACKET (ASSIGN)? ) + | ID COLON + | DOT ID ASSIGN + ) + ) + ; + +lcurlyInitializer + : #( NLcurlyInitializer + initializerList + RCURLY + ) + ; + +initializerList + : ( initializer )* + ; + + +declarator + : #( NDeclarator + ( pointerGroup )? + + ( id:ID + | LPAREN declarator RPAREN + ) + + ( #( NParameterTypeList + ( + parameterTypeList + | (idList)? + ) + RPAREN + ) + | LBRACKET ( expr )? RBRACKET + )* + ) + ; + + + +parameterTypeList + : ( parameterDeclaration ( COMMA | SEMI )? )+ ( VARARGS )? + ; + + + +parameterDeclaration + : #( NParameterDeclaration + declSpecifiers + (declarator | nonemptyAbstractDeclarator)? + ) + ; + + +functionDef + : #( NFunctionDef + ( functionDeclSpecifiers)? + declarator + (declaration | VARARGS)* + compoundStatement + ) + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + +functionDeclSpecifiers + : + ( functionStorageClassSpecifier + | typeQualifier + | typeSpecifier + )+ + ; + +declarationList + : + ( //ANTLR doesn't know that declarationList properly eats all the declarations + //so it warns about the ambiguity + options { + warnWhenFollowAmbig = false; + } : + localLabelDecl + | declaration + )+ + ; + +localLabelDecl + : #("__label__" (ID)+ ) + ; + + + +compoundStatement + : #( NCompoundStatement + ( declarationList + | functionDef + )* + ( statementList )? + RCURLY + ) + ; + +statementList + : ( statement )+ + ; + +statement + : statementBody + ; + +statementBody + : SEMI // Empty statements + + | compoundStatement // Group of statements + + | #(NStatementExpr expr) // Expressions + +// Iteration statements: + + | #( "while" expr statement ) + | #( "do" statement expr ) + | #( "for" + expr expr expr + statement + ) + + +// Jump statements: + + | #( "goto" expr ) + | "continue" + | "break" + | #( "return" ( expr )? ) + + +// Labeled statements: + | #( NLabel ID (statement)? ) + | #( "case" expr (statement)? ) + | #( "default" (statement)? ) + + + +// Selection statements: + + | #( "if" + expr statement + ( "else" statement )? + ) + | #( "switch" expr statement ) + + + + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + + + + + +expr + : assignExpr + | conditionalExpr + | logicalOrExpr + | logicalAndExpr + | inclusiveOrExpr + | exclusiveOrExpr + | bitAndExpr + | equalityExpr + | relationalExpr + | shiftExpr + | additiveExpr + | multExpr + | castExpr + | unaryExpr + | postfixExpr + | primaryExpr + | commaExpr + | emptyExpr + | compoundStatementExpr + | initializer + | rangeExpr + | gnuAsmExpr + ; + +commaExpr + : #(NCommaExpr expr expr) + ; + +emptyExpr + : NEmptyExpression + ; + +compoundStatementExpr + : #(LPAREN compoundStatement RPAREN) + ; + +rangeExpr + : #(NRangeExpr expr VARARGS expr) + ; + +gnuAsmExpr + : #(NGnuAsmExpr + ("volatile")? + LPAREN stringConst + ( options { warnWhenFollowAmbig = false; }: + COLON (strOptExprPair ( COMMA strOptExprPair)* )? + ( options { warnWhenFollowAmbig = false; }: + COLON (strOptExprPair ( COMMA strOptExprPair)* )? + )? + )? + ( COLON stringConst ( COMMA stringConst)* )? + RPAREN + ) + ; + +strOptExprPair + : stringConst ( LPAREN expr RPAREN )? + ; + +assignExpr + : #( ASSIGN expr expr) + | #( DIV_ASSIGN expr expr) + | #( PLUS_ASSIGN expr expr) + | #( MINUS_ASSIGN expr expr) + | #( STAR_ASSIGN expr expr) + | #( MOD_ASSIGN expr expr) + | #( RSHIFT_ASSIGN expr expr) + | #( LSHIFT_ASSIGN expr expr) + | #( BAND_ASSIGN expr expr) + | #( BOR_ASSIGN expr expr) + | #( BXOR_ASSIGN expr expr) + ; + + +conditionalExpr + : #( QUESTION expr (expr)? COLON expr ) + ; + + +logicalOrExpr + : #( LOR expr expr) + ; + + +logicalAndExpr + : #( LAND expr expr ) + ; + + +inclusiveOrExpr + : #( BOR expr expr ) + ; + + +exclusiveOrExpr + : #( BXOR expr expr ) + ; + + +bitAndExpr + : #( BAND expr expr ) + ; + + + +equalityExpr + : #( EQUAL expr expr) + | #( NOT_EQUAL expr expr) + ; + + +relationalExpr + : #( LT expr expr) + | #( LTE expr expr) + | #( GT expr expr) + | #( GTE expr expr) + ; + + + +shiftExpr + : #( LSHIFT expr expr) + | #( RSHIFT expr expr) + ; + + +additiveExpr + : #( PLUS expr expr) + | #( MINUS expr expr) + ; + + +multExpr + : #( STAR expr expr) + | #( DIV expr expr) + | #( MOD expr expr) + ; + + + +castExpr + : #( NCast typeName RPAREN expr) + ; + + +typeName + : specifierQualifierList (nonemptyAbstractDeclarator)? + ; + +nonemptyAbstractDeclarator + : #( NNonemptyAbstractDeclarator + ( pointerGroup + ( (LPAREN + ( nonemptyAbstractDeclarator + | parameterTypeList + )? + RPAREN) + | (LBRACKET (expr)? RBRACKET) + )* + + | ( (LPAREN + ( nonemptyAbstractDeclarator + | parameterTypeList + )? + RPAREN) + | (LBRACKET (expr)? RBRACKET) + )+ + ) + ) + ; + + + +unaryExpr + : #( INC expr ) + | #( DEC expr ) + | #( NUnaryExpr unaryOperator expr) + | #( "sizeof" + ( ( LPAREN typeName )=> LPAREN typeName RPAREN + | expr + ) + ) + | #( "__alignof" + ( ( LPAREN typeName )=> LPAREN typeName RPAREN + | expr + ) + ) + ; +/* +exception +catch [RecognitionException ex] + { + reportError(ex); + System.out.println("PROBLEM TREE:\n" + + _t.toStringList()); + if (_t!=null) {_t = _t.getNextSibling();} + } +*/ + + unaryOperator + : BAND + | STAR + | PLUS + | MINUS + | BNOT + | LNOT + | LAND + | "__real" + | "__imag" + ; + + +postfixExpr + : #( NPostfixExpr + primaryExpr + ( PTR ID + | DOT ID + | #( NFunctionCallArgs (argExprList)? RPAREN ) + | LBRACKET expr RBRACKET + | INC + | DEC + )+ + ) + ; + + + +primaryExpr + : ID + | Number + | charConst + | stringConst + +// JTC: +// ID should catch the enumerator +// leaving it in gives ambiguous err +// | enumerator + + | #( NExpressionGroup expr ) + ; + + + +argExprList + : ( expr )+ + ; + + + +protected +charConst + : CharLiteral + ; + + +protected +stringConst + : #(NStringSeq (StringLiteral)+) + ; + + +protected +intConst + : IntOctalConst + | LongOctalConst + | UnsignedOctalConst + | IntIntConst + | LongIntConst + | UnsignedIntConst + | IntHexConst + | LongHexConst + | UnsignedHexConst + ; + + +protected +floatConst + : FloatDoubleConst + | DoubleDoubleConst + | LongDoubleConst + ; + + + + + + + + + diff --git a/src/main/antlr/com/jogamp/gluegen/cgram/HeaderParser.g b/src/main/antlr/com/jogamp/gluegen/cgram/HeaderParser.g new file mode 100644 index 0000000..75cf413 --- /dev/null +++ b/src/main/antlr/com/jogamp/gluegen/cgram/HeaderParser.g @@ -0,0 +1,785 @@ +/* + * Copyright (c) 2003 Sun Microsystems, Inc. All Rights Reserved. + * Copyright (c) 2010 JogAmp Community. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistribution of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistribution in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * Neither the name of Sun Microsystems, Inc. or the names of + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * This software is provided "AS IS," without a warranty of any kind. ALL + * EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, + * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN + * MIDROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR + * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR + * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN OR + * ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR FOR + * DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE + * DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, + * ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF + * SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + * + * You acknowledge that this software is not designed or intended for use + * in the design, construction, operation or maintenance of any nuclear + * facility. + * + * Sun gratefully acknowledges that this software was originally authored + * and developed by Kenneth Bradley Russell and Christopher John Kline. + */ + +header { + package com.jogamp.gluegen.cgram; + + import java.io.*; + import java.util.*; + + import antlr.CommonAST; + import com.jogamp.gluegen.cgram.types.*; +} + +class HeaderParser extends GnuCTreeParser; +options { + k = 1; +} + +{ + /** Name assigned to a anonymous EnumType (e.g., "enum { ... }"). */ + public static final String ANONYMOUS_ENUM_NAME = "<anonymous>"; + + boolean debug = false; + + public boolean getDebug() { + return debug; + } + + public void setDebug(boolean debug) { + this.debug = debug; + } + + /** Set the dictionary mapping typedef names to types for this + HeaderParser. Must be done before parsing. */ + public void setTypedefDictionary(TypeDictionary dict) { + this.typedefDictionary = dict; + } + + /** Returns the typedef dictionary this HeaderParser uses. */ + public TypeDictionary getTypedefDictionary() { + return typedefDictionary; + } + + /** Set the dictionary mapping struct names (i.e., the "foo" in + "struct foo { ... };") to types for this HeaderParser. Must be done + before parsing. */ + public void setStructDictionary(TypeDictionary dict) { + this.structDictionary = dict; + } + + /** Returns the struct name dictionary this HeaderParser uses. */ + public TypeDictionary getStructDictionary() { + return structDictionary; + } + + /** Get the canonicalization map, which is a regular HashMap + mapping Type to Type and which is used for looking up the unique + instances of e.g. pointer-to-structure types that have been typedefed + and therefore have names. */ + public Map getCanonMap() { + return canonMap; + } + + /** Pre-define the list of EnumTypes for this HeaderParser. Must be + done before parsing. */ + public void setEnums(List<EnumType> enumTypes) { + // FIXME: Need to take the input set of EnumTypes, extract all + // the enumerates from each EnumType, and fill in the enumHash + // so that each enumerate maps to the enumType to which it + // belongs. + throw new RuntimeException("setEnums is Unimplemented!"); + } + + /** Returns the EnumTypes this HeaderParser processed. */ + public List<EnumType> getEnums() { + return new ArrayList<EnumType>(enumHash.values()); + } + + /** Clears the list of functions this HeaderParser has parsed. + Useful when reusing the same HeaderParser for more than one + header file. */ + public void clearParsedFunctions() { + functions.clear(); + } + + /** Returns the list of FunctionSymbols this HeaderParser has parsed. */ + public List<FunctionSymbol> getParsedFunctions() { + return functions; + } + + private CompoundType lookupInStructDictionary(String typeName, + CompoundTypeKind kind, + int cvAttrs) { + CompoundType t = (CompoundType) structDictionary.get(typeName); + if (t == null) { + t = CompoundType.create(null, null, kind, cvAttrs); + t.setStructName(typeName); + structDictionary.put(typeName, t); + } + return t; + } + + private Type lookupInTypedefDictionary(String typeName) { + Type t = typedefDictionary.get(typeName); + if (t == null) { + throw new RuntimeException("Undefined reference to typedef name " + typeName); + } + return t; + } + + static class ParameterDeclaration { + private String id; + private Type type; + + ParameterDeclaration(String id, Type type) { + this.id = id; + this.type = type; + } + String id() { return id; } + Type type() { return type; } + } + + // A box for a Type. Allows type to be passed down to be modified by recursive rules. + static class TypeBox { + private Type origType; + private Type type; + private boolean isTypedef; + + TypeBox(Type type) { + this(type, false); + } + + TypeBox(Type type, boolean isTypedef) { + this.origType = type; + this.isTypedef = isTypedef; + } + + Type type() { + if (type == null) { + return origType; + } + return type; + } + void setType(Type type) { + this.type = type; + } + void reset() { + type = null; + } + + boolean isTypedef() { return isTypedef; } + + // for easier debugging + public String toString() { + String tStr = "Type=NULL_REF"; + if (type == origType) { + tStr = "Type=ORIG_TYPE"; + } else if (type != null) { + tStr = "Type: name=\"" + type.getCVAttributesString() + " " + + type.getName() + "\"; signature=\"" + type + "\"; class " + + type.getClass().getName(); + } + String oStr = "OrigType=NULL_REF"; + if (origType != null) { + oStr = "OrigType: name=\"" + origType.getCVAttributesString() + " " + + origType.getName() + "\"; signature=\"" + origType + "\"; class " + + origType.getClass().getName(); + } + return "<["+tStr + "] [" + oStr + "] " + " isTypedef=" + isTypedef+">"; + } + } + + private String getTypeString(Type t) { + StringBuilder sb = new StringBuilder(); + sb.append("["); + sb.append(t); + sb.append(", size: "); + if(null!=t) { + SizeThunk st = t.getSize(); + if(null!=st) { + sb.append(st.getClass().getName()); + } else { + sb.append("undef"); + } + } + sb.append("]"); + return sb.toString(); + } + + private void debugPrintln(String msg) { + if(debug) { + System.err.println(msg); + } + } + + private void debugPrint(String msg) { + if(debug) { + System.err.print(msg); + } + } + + private boolean doDeclaration; // Used to only process function typedefs + private String declId; + private List parameters; + private TypeDictionary typedefDictionary; + private TypeDictionary structDictionary; + private List<FunctionSymbol> functions = new ArrayList<FunctionSymbol>(); + // hash from name of an enumerated value to the EnumType to which it belongs + private HashMap<String, EnumType> enumHash = new HashMap<String, EnumType>(); + + // Storage class specifiers + private static final int AUTO = 1 << 0; + private static final int REGISTER = 1 << 1; + private static final int TYPEDEF = 1 << 2; + // Function storage class specifiers + private static final int EXTERN = 1 << 3; + private static final int STATIC = 1 << 4; + private static final int INLINE = 1 << 5; + // Type qualifiers + private static final int CONST = 1 << 6; + private static final int VOLATILE = 1 << 7; + private static final int SIGNED = 1 << 8; + private static final int UNSIGNED = 1 << 9; + + private void initDeclaration() { + doDeclaration = false; + declId = null; + } + + private void doDeclaration() { + doDeclaration = true; + } + + private void processDeclaration(Type returnType) { + if (doDeclaration) { + FunctionSymbol sym = new FunctionSymbol(declId, new FunctionType(null, null, returnType, 0)); + if (parameters != null) { // handle funcs w/ empty parameter lists (e.g., "foo()") + for (Iterator iter = parameters.iterator(); iter.hasNext(); ) { + ParameterDeclaration pd = (ParameterDeclaration) iter.next(); + sym.addArgument(pd.type(), pd.id()); + } + } + functions.add(sym); + } + } + + private int attrs2CVAttrs(int attrs) { + int cvAttrs = 0; + if ((attrs & CONST) != 0) { + cvAttrs |= CVAttributes.CONST; + } + if ((attrs & VOLATILE) != 0) { + cvAttrs |= CVAttributes.VOLATILE; + } + return cvAttrs; + } + + /** Helper routine which handles creating a pointer or array type + for [] expressions */ + private void handleArrayExpr(TypeBox tb, AST t) { + if (t != null) { + try { + int len = parseIntConstExpr(t); + tb.setType(canonicalize(new ArrayType(tb.type(), SizeThunk.mul(SizeThunk.constant(len), tb.type().getSize()), len, 0))); + return; + } catch (RecognitionException e) { + // Fall through + } + } + tb.setType(canonicalize(new PointerType(SizeThunk.POINTER, + tb.type(), + 0))); + } + + private int parseIntConstExpr(AST t) throws RecognitionException { + return intConstExpr(t); + } + + /** Utility function: creates a new EnumType with the given name, or + returns an existing one if it has already been created. */ + private EnumType getEnumType(String enumTypeName) { + EnumType enumType = null; + Iterator<EnumType> it = enumHash.values().iterator(); + while (it.hasNext()) { + EnumType potentialMatch = it.next(); + if (potentialMatch.getName().equals(enumTypeName)) { + enumType = potentialMatch; + break; + } + } + + if (enumType == null) { + // This isn't quite correct. In theory the enum should expand to + // the size of the largest element, so if there were a long long + // entry the enum should expand to e.g. int64. However, using + // "long" here (which is what used to be the case) was + // definitely incorrect and caused problems. + enumType = new EnumType(enumTypeName, SizeThunk.INT32); + } + + return enumType; + } + + // Map used to canonicalize types. For example, we may typedef + // struct foo { ... } *pfoo; subsequent references to struct foo* should + // point to the same PointerType object that had its name set to "pfoo". + private Map canonMap = new HashMap(); + private Type canonicalize(Type t) { + Type res = (Type) canonMap.get(t); + if (res != null) { + return res; + } + canonMap.put(t, t); + return t; + } +} + +declarator[TypeBox tb] returns [String s] { + initDeclaration(); + s = null; + List params = null; + String funcPointerName = null; + TypeBox dummyTypeBox = null; +} + : #( NDeclarator + ( pointerGroup[tb] )? + + ( id:ID { s = id.getText(); } + | LPAREN funcPointerName = declarator[dummyTypeBox] RPAREN + ) + + ( #( NParameterTypeList + ( + params = parameterTypeList + | (idList)? + ) + RPAREN + ) { + if (id != null) { + declId = id.getText(); + parameters = params; // FIXME: Ken, why are we setting this class member here? + doDeclaration(); + } else if ( funcPointerName != null ) { + /* TypeBox becomes function pointer in this case */ + FunctionType ft = new FunctionType(null, null, tb.type(), 0); + if (params == null) { + // If the function pointer has no declared parameters, it's a + // void function. I'm not sure if the parameter name is + // ever referenced anywhere when the type is VoidType, so + // just in case I'll set it to a comment string so it will + // still compile if written out to code anywhere. + ft.addArgument(new VoidType(0), "/*unnamed-void*/"); + } else { + for (Iterator iter = params.iterator(); iter.hasNext(); ) { + ParameterDeclaration pd = (ParameterDeclaration) iter.next(); + ft.addArgument(pd.type(), pd.id()); + } + } + tb.setType(canonicalize(new PointerType(SizeThunk.POINTER, + ft, + 0))); + s = funcPointerName; + } + } + | LBRACKET ( e:expr )? RBRACKET { handleArrayExpr(tb, e); } + )* + ) + ; + +typelessDeclaration { + TypeBox tb = null; +} + : #(NTypeMissing initDeclList[tb] SEMI) + ; + +declaration { + TypeBox tb = null; +} + : #( NDeclaration + tb = declSpecifiers + ( + initDeclList[tb] + )? + ( SEMI )+ + ) { processDeclaration(tb.type()); } + ; + +parameterTypeList returns [List l] { l = new ArrayList(); ParameterDeclaration decl = null; } + : ( decl = parameterDeclaration { if (decl != null) l.add(decl); } ( COMMA | SEMI )? )+ ( VARARGS )? + ; + +parameterDeclaration returns [ParameterDeclaration pd] { + Type t = null; + String decl = null; + pd = null; + TypeBox tb = null; +} + : #( NParameterDeclaration + tb = declSpecifiers + (decl = declarator[tb] | nonemptyAbstractDeclarator[tb])? + ) { pd = new ParameterDeclaration(decl, tb.type()); } + ; + +functionDef { + TypeBox tb = null; +} + : #( NFunctionDef + ( functionDeclSpecifiers)? + declarator[tb] + (declaration | VARARGS)* + compoundStatement + ) + ; + +declSpecifiers returns [TypeBox tb] { + tb = null; + Type t = null; + int x = 0; + int y = 0; +} + : ( y = storageClassSpecifier { x |= y; } + | y = typeQualifier { x |= y; } + | t = typeSpecifier[x] + )+ +{ + if (t == null && + (x & (SIGNED | UNSIGNED)) != 0) { + t = new IntType("int", SizeThunk.INTxx, ((x & UNSIGNED) != 0), attrs2CVAttrs(x)); + } + tb = new TypeBox(t, ((x & TYPEDEF) != 0)); +} + ; + +storageClassSpecifier returns [int x] { x = 0; } + : "auto" { x |= AUTO; } + | "register" { x |= REGISTER; } + | "typedef" { x |= TYPEDEF; } + | x = functionStorageClassSpecifier + ; + + +functionStorageClassSpecifier returns [int x] { x = 0; } + : "extern" { x |= EXTERN; } + | "static" { x |= STATIC; } + | "inline" { x |= INLINE; } + ; + + +typeQualifier returns [int x] { x = 0; } + : "const" { x |= CONST; } + | "volatile" { x |= VOLATILE; } + | "signed" { x |= SIGNED; } + | "unsigned" { x |= UNSIGNED; } + ; + +typeSpecifier[int attributes] returns [Type t] { + t = null; + int cvAttrs = attrs2CVAttrs(attributes); + boolean unsigned = ((attributes & UNSIGNED) != 0); +} + : "void" { t = new VoidType(cvAttrs); } + | "char" { t = new IntType("char" , SizeThunk.INT8, unsigned, cvAttrs); } + | "short" { t = new IntType("short", SizeThunk.INT16, unsigned, cvAttrs); } + | "int" { t = new IntType("int" , SizeThunk.INTxx, unsigned, cvAttrs); } + | "long" { t = new IntType("long" , SizeThunk.LONG, unsigned, cvAttrs); } + | "float" { t = new FloatType("float", SizeThunk.FLOAT, cvAttrs); } + | "double" { t = new DoubleType("double", SizeThunk.DOUBLE, cvAttrs); } + | "__int32" { t = new IntType("__int32", SizeThunk.INT32, unsigned, cvAttrs); } + | "__int64" { t = new IntType("__int64", SizeThunk.INT64, unsigned, cvAttrs); } + | "int8_t" { t = new IntType("int8_t", SizeThunk.INT8, false, cvAttrs); /* TS: always signed */ } + | "uint8_t" { t = new IntType("uint8_t", SizeThunk.INT8, true, cvAttrs); /* TS: always unsigned */ } + | "int16_t" { t = new IntType("int16_t", SizeThunk.INT16, false, cvAttrs); /* TS: always signed */ } + | "uint16_t" { t = new IntType("uint16_t", SizeThunk.INT16, true, cvAttrs); /* TS: always unsigned */ } + | "int32_t" { t = new IntType("int32_t", SizeThunk.INT32, false, cvAttrs); /* TS: always signed */ } + | "wchar_t" { t = new IntType("wchar_t", SizeThunk.INT32, false, cvAttrs); /* TS: always signed */ } + | "uint32_t" { t = new IntType("uint32_t", SizeThunk.INT32, true, cvAttrs, true); /* TS: always unsigned */ } + | "int64_t" { t = new IntType("int64_t", SizeThunk.INT64, false, cvAttrs); /* TS: always signed */ } + | "uint64_t" { t = new IntType("uint64_t", SizeThunk.INT64, true, cvAttrs, true); /* TS: always unsigned */ } + | "ptrdiff_t" { t = new IntType("ptrdiff_t", SizeThunk.POINTER, false, cvAttrs); /* TS: always signed */ } + | "intptr_t" { t = new IntType("intptr_t", SizeThunk.POINTER, false, cvAttrs); /* TS: always signed */ } + | "size_t" { t = new IntType("size_t", SizeThunk.POINTER, true, cvAttrs, true); /* TS: always unsigned */ } + | "uintptr_t" { t = new IntType("uintptr_t", SizeThunk.POINTER, true, cvAttrs, true); /* TS: always unsigned */ } + | t = structSpecifier[cvAttrs] ( attributeDecl )* + | t = unionSpecifier [cvAttrs] ( attributeDecl )* + | t = enumSpecifier [cvAttrs] + | t = typedefName [cvAttrs] + | #("typeof" LPAREN + ( (typeName )=> typeName + | expr + ) + RPAREN + ) + | "__complex" + ; + +typedefName[int cvAttrs] returns [Type t] { t = null; } + : #(NTypedefName id : ID) + { + Type tdict = lookupInTypedefDictionary(id.getText()); + t = canonicalize(tdict.getCVVariant(cvAttrs)); + debugPrintln("Adding typedef canon : [" + id.getText() + "] -> [" + tdict + "] -> "+getTypeString(t)); + } + ; + +structSpecifier[int cvAttrs] returns [Type t] { t = null; } + : #( "struct" t = structOrUnionBody[CompoundTypeKind.STRUCT, cvAttrs] ) + ; + +unionSpecifier[int cvAttrs] returns [Type t] { t = null; } + : #( "union" t = structOrUnionBody[CompoundTypeKind.UNION, cvAttrs] ) + ; + +structOrUnionBody[CompoundTypeKind kind, int cvAttrs] returns [CompoundType t] { + t = null; +} + : ( (ID LCURLY) => id:ID LCURLY { + t = (CompoundType) canonicalize(lookupInStructDictionary(id.getText(), kind, cvAttrs)); + } ( structDeclarationList[t] )? + RCURLY { t.setBodyParsed(); } + | LCURLY { t = CompoundType.create(null, null, kind, cvAttrs); } + ( structDeclarationList[t] )? + RCURLY { t.setBodyParsed(); } + | id2:ID { t = (CompoundType) canonicalize(lookupInStructDictionary(id2.getText(), kind, cvAttrs)); } + ) + ; + +structDeclarationList[CompoundType t] + : ( structDeclaration[t] )+ + ; + +structDeclaration[CompoundType containingType] { + Type t = null; + boolean addedAny = false; +} + : t = specifierQualifierList addedAny = structDeclaratorList[containingType, t] { + if (!addedAny) { + if (t != null) { + CompoundType ct = t.asCompound(); + if (ct.isUnion()) { + // Anonymous union + containingType.addField(new Field(null, t, null)); + } + } + } + } + ; + +specifierQualifierList returns [Type t] { + t = null; int x = 0; int y = 0; +} + : ( + t = typeSpecifier[x] + | y = typeQualifier { x |= y; } + )+ { + if (t == null && + (x & (SIGNED | UNSIGNED)) != 0) { + t = new IntType("int", SizeThunk.INTxx, ((x & UNSIGNED) != 0), attrs2CVAttrs(x)); + } +} + ; + +structDeclaratorList[CompoundType containingType, Type t] returns [boolean addedAny] { + addedAny = false; + boolean y = false; +} + : ( y = structDeclarator[containingType, t] { addedAny = y; })+ + ; + +structDeclarator[CompoundType containingType, Type t] returns [boolean addedAny] { + addedAny = false; + String s = null; + TypeBox tb = new TypeBox(t); +} + : + #( NStructDeclarator + ( s = declarator[tb] { containingType.addField(new Field(s, tb.type(), null)); addedAny = true; } )? + ( COLON expr { /* FIXME: bit types not handled yet */ } ) ? + ( attributeDecl )* + ) + ; + +// FIXME: this will not correctly set the name of the enumeration when +// encountering a declaration like this: +// +// typedef enum { } enumName; +// +// In this case calling getName() on the EnumType return value will +// incorrectly return HeaderParser.ANONYMOUS_ENUM_NAME instead of +// "enumName" +// +// I haven't implemented it yet because I'm not sure how to get the +// "enumName" *before* executing the enumList rule. +enumSpecifier [int cvAttrs] returns [Type t] { + t = null; +} + : #( "enum" + ( ( ID LCURLY )=> i:ID LCURLY enumList[(EnumType)(t = getEnumType(i.getText()))] RCURLY + | LCURLY enumList[(EnumType)(t = getEnumType(ANONYMOUS_ENUM_NAME))] RCURLY + | ID { t = getEnumType(i.getText()); } + ) + ) + ; + +enumList[EnumType enumeration] { + long defaultEnumerantValue = 0; +} + : ( defaultEnumerantValue = enumerator[enumeration, defaultEnumerantValue] )+ + ; + +enumerator[EnumType enumeration, long defaultValue] returns [long newDefaultValue] { + newDefaultValue = defaultValue; +} + : eName:ID ( ASSIGN eVal:expr )? { + long value = 0; + if (eVal != null) { + String vTxt = eVal.getAllChildrenText(); + if (enumHash.containsKey(vTxt)) { + EnumType oldEnumType = enumHash.get(vTxt); + value = oldEnumType.getEnumValue(vTxt); + } else { + try { + value = Long.decode(vTxt).longValue(); + } catch (NumberFormatException e) { + System.err.println("NumberFormatException: ID[" + eName.getText() + "], VALUE=[" + vTxt + "]"); + throw e; + } + } + } else { + value = defaultValue; + } + + newDefaultValue = value+1; + String eTxt = eName.getText(); + if (enumHash.containsKey(eTxt)) { + EnumType oldEnumType = enumHash.get(eTxt); + long oldValue = oldEnumType.getEnumValue(eTxt); + System.err.println("WARNING: redefinition of enumerated value '" + eTxt + "';" + + " existing definition is in enumeration '" + oldEnumType.getName() + + "' with value " + oldValue + " and new definition is in enumeration '" + + enumeration.getName() + "' with value " + value); + // remove old definition + oldEnumType.removeEnumerate(eTxt); + } + // insert new definition + enumeration.addEnum(eTxt, value); + enumHash.put(eTxt, enumeration); + debugPrintln("ENUM [" + enumeration.getName() + "]: " + eTxt + " = " + enumeration.getEnumValue(eTxt) + + " (new default = " + newDefaultValue + ")"); + } + ; + +initDeclList[TypeBox tb] + : ( initDecl[tb] )+ + ; + +initDecl[TypeBox tb] { + String declName = null; +} + : #( NInitDecl + declName = declarator[tb] { + debugPrintln("GOT declName: " + declName + " TB=" + tb); + } + ( attributeDecl )* + ( ASSIGN initializer + | COLON expr + )? + ) +{ + if ((declName != null) && (tb != null) && tb.isTypedef()) { + Type t = tb.type(); + debugPrint("Adding typedef mapping: [" + declName + "] -> "+getTypeString(t)); + if (!t.hasTypedefName()) { + t.setName(declName); + debugPrint(" - declName -> "+getTypeString(t)); + } else { + // copy type to preserve declName ! + t = (Type) t.clone(); + t.setName(declName); + debugPrint(" - copy -> "+getTypeString(t)); + } + t = canonicalize(t); + debugPrintln(" - canon -> "+getTypeString(t)); + typedefDictionary.put(declName, t); + // Clear out PointerGroup effects in case another typedef variant follows + tb.reset(); + } +} + ; + +pointerGroup[TypeBox tb] { int x = 0; int y = 0; } + : #( NPointerGroup ( STAR { x = 0; y = 0; } ( y = typeQualifier { x |= y; } )* + { + debugPrintln("IN PTR GROUP: TB=" + tb); + if (tb != null) { + tb.setType(canonicalize(new PointerType(SizeThunk.POINTER, + tb.type(), + attrs2CVAttrs(x)))); + } + } + )+ ) + ; + + +functionDeclSpecifiers + : + ( functionStorageClassSpecifier + | typeQualifier + | typeSpecifier[0] + )+ + ; + +typeName { + TypeBox tb = null; +} + : specifierQualifierList (nonemptyAbstractDeclarator[tb])? + ; + + +/* FIXME: the handling of types in this rule has not been well thought + out and is known to be incomplete. Currently it is only used to handle + pointerGroups for unnamed parameters. */ +nonemptyAbstractDeclarator[TypeBox tb] + : #( NNonemptyAbstractDeclarator + ( pointerGroup[tb] + ( (LPAREN + ( nonemptyAbstractDeclarator[tb] + | parameterTypeList + )? + RPAREN) + | (LBRACKET (e1:expr)? RBRACKET) { handleArrayExpr(tb, e1); } + )* + + | ( (LPAREN + ( nonemptyAbstractDeclarator[tb] + | parameterTypeList + )? + RPAREN) + | (LBRACKET (e2:expr)? RBRACKET) { handleArrayExpr(tb, e2); } + )+ + ) + ) + ; + +/* Helper routine for parsing expressions which evaluate to integer + constants. Can be made more complicated as necessary. */ +intConstExpr returns [int i] { i = -1; } + : n:Number { return Integer.parseInt(n.getText()); } + ; diff --git a/src/main/antlr/com/jogamp/gluegen/cgram/StdCParser.g b/src/main/antlr/com/jogamp/gluegen/cgram/StdCParser.g new file mode 100644 index 0000000..7b34656 --- /dev/null +++ b/src/main/antlr/com/jogamp/gluegen/cgram/StdCParser.g @@ -0,0 +1,1401 @@ +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + Copyright (c) Non, Inc. 1997 -- All Rights Reserved + +PROJECT: C Compiler +MODULE: Parser +FILE: stdc.g + +AUTHOR: John D. Mitchell ([email protected]), Jul 12, 1997 + +REVISION HISTORY: + + Name Date Description + ---- ---- ----------- + JDM 97.07.12 Initial version. + JTC 97.11.18 Declaration vs declarator & misc. hacking. + JDM 97.11.20 Fixed: declaration vs funcDef, + parenthesized expressions, + declarator iteration, + varargs recognition, + empty source file recognition, + and some typos. + + +DESCRIPTION: + + This grammar supports the Standard C language. + + Note clearly that this grammar does *NOT* deal with + preprocessor functionality (including things like trigraphs) + Nor does this grammar deal with multi-byte characters nor strings + containing multi-byte characters [these constructs are "exercises + for the reader" as it were :-)]. + + Please refer to the ISO/ANSI C Language Standard if you believe + this grammar to be in error. Please cite chapter and verse in any + correspondence to the author to back up your claim. + +TODO: + + - typedefName is commented out, needs a symbol table to resolve + ambiguity. + + - trees + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + + +header { + package com.jogamp.gluegen.cgram; + + import java.io.*; + + import antlr.CommonAST; + import antlr.DumpASTVisitor; +} + + +class StdCParser extends Parser; + +options + { + k = 2; + exportVocab = STDC; + buildAST = true; + ASTLabelType = "TNode"; + + // Copied following options from java grammar. + codeGenMakeSwitchThreshold = 2; + codeGenBitsetTestThreshold = 3; + } + + +{ + // Suppport C++-style single-line comments? + public static boolean CPPComments = true; + + // access to symbol table + public CSymbolTable symbolTable = new CSymbolTable(); + + // source for names to unnamed scopes + protected int unnamedScopeCounter = 0; + + public boolean isTypedefName(String name) { + boolean returnValue = false; + TNode node = symbolTable.lookupNameInCurrentScope(name); + for (; node != null; node = (TNode) node.getNextSibling() ) { + if(node.getType() == LITERAL_typedef) { + returnValue = true; + break; + } + } + return returnValue; + } + + + public String getAScopeName() { + return "" + (unnamedScopeCounter++); + } + + public void pushScope(String scopeName) { + symbolTable.pushScope(scopeName); + } + + public void popScope() { + symbolTable.popScope(); + } + + int traceDepth = 0; + public void reportError(RecognitionException ex) { + try { + System.err.println("ANTLR Parsing Error: "+ex + " token name:" + tokenNames[LA(1)]); + ex.printStackTrace(System.err); + } + catch (TokenStreamException e) { + System.err.println("ANTLR Parsing Error: "+ex); + ex.printStackTrace(System.err); + } + } + public void reportError(String s) { + System.err.println("ANTLR Parsing Error from String: " + s); + } + public void reportWarning(String s) { + System.err.println("ANTLR Parsing Warning from String: " + s); + } + public void match(int t) throws MismatchedTokenException { + boolean debugging = false; + + if ( debugging ) { + for (int x=0; x<traceDepth; x++) System.out.print(" "); + try { + System.out.println("Match("+tokenNames[t]+") with LA(1)="+ + tokenNames[LA(1)] + ((inputState.guessing>0)?" [inputState.guessing "+ inputState.guessing + "]":"")); + } + catch (TokenStreamException e) { + System.out.println("Match("+tokenNames[t]+") " + ((inputState.guessing>0)?" [inputState.guessing "+ inputState.guessing + "]":"")); + + } + + } + try { + if ( LA(1)!=t ) { + if ( debugging ){ + for (int x=0; x<traceDepth; x++) System.out.print(" "); + System.out.println("token mismatch: "+tokenNames[LA(1)] + + "!="+tokenNames[t]); + } + throw new MismatchedTokenException(tokenNames, LT(1), t, false, getFilename()); + + } else { + // mark token as consumed -- fetch next token deferred until LA/LT + consume(); + } + } + catch (TokenStreamException e) { + } + + } + public void traceIn(String rname) { + traceDepth += 1; + for (int x=0; x<traceDepth; x++) System.out.print(" "); + try { + System.out.println("> "+rname+"; LA(1)==("+ tokenNames[LT(1).getType()] + + ") " + LT(1).getText() + " [inputState.guessing "+ inputState.guessing + "]"); + } + catch (TokenStreamException e) { + } + } + public void traceOut(String rname) { + for (int x=0; x<traceDepth; x++) System.out.print(" "); + try { + System.out.println("< "+rname+"; LA(1)==("+ tokenNames[LT(1).getType()] + + ") "+LT(1).getText() + " [inputState.guessing "+ inputState.guessing + "]"); + } + catch (TokenStreamException e) { + } + traceDepth -= 1; + } + +} + + + +translationUnit + : externalList + + | /* Empty source files are *not* allowed. */ + { + System.err.println ( "Empty source file!" ); + } + ; + + +externalList + : ( externalDef )+ + ; + + +externalDef + : ( "typedef" | declaration )=> declaration + | functionDef + | asm_expr + ; + + +asm_expr + : "asm"^ + ("volatile")? LCURLY! expr RCURLY! SEMI! + ; + + +declaration + { AST ds1 = null; } + : ds:declSpecifiers { ds1 = astFactory.dupList(#ds); } + ( + initDeclList[ds1] + )? + SEMI! + { ## = #( #[NDeclaration], ##); } + + ; + + +declSpecifiers + { int specCount=0; } + : ( options { // this loop properly aborts when + // it finds a non-typedefName ID MBZ + warnWhenFollowAmbig = false; + } : + s:storageClassSpecifier + | typeQualifier + | ( "struct" | "union" | "enum" | typeSpecifier[specCount] )=> + specCount = typeSpecifier[specCount] + )+ + ; + +storageClassSpecifier + : "auto" + | "register" + | "typedef" + | functionStorageClassSpecifier + ; + + +functionStorageClassSpecifier + : "extern" + | "static" + ; + + +typeQualifier + : "const" + | "volatile" + ; + +typeSpecifier [int specCount] returns [int retSpecCount] + { retSpecCount = specCount + 1; } + : + ( "void" + | "char" + | "short" + | "int" + | "long" + | "float" + | "double" + | "signed" + | "unsigned" + | "int8_t" + | "uint8_t" + | "int16_t" + | "uint16_t" + | "__int32" + | "int32_t" + | "wchar_t" + | "uint32_t" + | "__int64" + | "int64_t" + | "uint64_t" + | "ptrdiff_t" + | "intptr_t" + | "size_t" + | "uintptr_t" + | structOrUnionSpecifier + | enumSpecifier + | { specCount == 0 }? typedefName + ) + ; + + +typedefName + : { isTypedefName ( LT(1).getText() ) }? + i:ID { ## = #(#[NTypedefName], #i); } + ; + +structOrUnionSpecifier + { String scopeName; } + : sou:structOrUnion! + ( ( ID LCURLY )=> i:ID l:LCURLY + { + scopeName = #sou.getText() + " " + #i.getText(); + #l.setText(scopeName); + pushScope(scopeName); + } + structDeclarationList + { popScope();} + RCURLY! + | l1:LCURLY + { + scopeName = getAScopeName(); + #l1.setText(scopeName); + pushScope(scopeName); + } + structDeclarationList + { popScope(); } + RCURLY! + | ID + ) + { + ## = #( #sou, ## ); + } + ; + + +structOrUnion + : "struct" + | "union" + ; + + +structDeclarationList + : ( structDeclaration )+ + ; + + +structDeclaration + : specifierQualifierList structDeclaratorList ( SEMI! )+ + ; + + +specifierQualifierList + { int specCount = 0; } + : ( options { // this loop properly aborts when + // it finds a non-typedefName ID MBZ + warnWhenFollowAmbig = false; + } : + ( "struct" | "union" | "enum" | typeSpecifier[specCount] )=> + specCount = typeSpecifier[specCount] + | typeQualifier + )+ + ; + + +structDeclaratorList + : structDeclarator ( COMMA! structDeclarator )* + ; + + +structDeclarator + : + ( COLON constExpr + | declarator[false] ( COLON constExpr )? + ) + { ## = #( #[NStructDeclarator], ##); } + ; + + +enumSpecifier + : "enum"^ + ( ( ID LCURLY )=> i:ID LCURLY enumList[i.getText()] RCURLY! + | LCURLY enumList["anonymous"] RCURLY! + | ID + ) + ; + + +enumList[String enumName] + : enumerator[enumName] ( COMMA! enumerator[enumName] )* + ; + +enumerator[String enumName] + : i:ID { symbolTable.add( i.getText(), + #( null, + #[LITERAL_enum, "enum"], + #[ ID, enumName] + ) + ); + } + (ASSIGN constExpr)? + ; + + +initDeclList[AST declarationSpecifiers] + : initDecl[declarationSpecifiers] + ( COMMA! initDecl[declarationSpecifiers] )* + ; + + +initDecl[AST declarationSpecifiers] + { String declName = ""; } + : declName = d:declarator[false] + { AST ds1, d1; + ds1 = astFactory.dupList(declarationSpecifiers); + d1 = astFactory.dupList(#d); + symbolTable.add(declName, #(null, ds1, d1) ); + } + ( ASSIGN initializer + | COLON expr + )? + { ## = #( #[NInitDecl], ## ); } + + ; + +pointerGroup + : ( STAR ( typeQualifier )* )+ { ## = #( #[NPointerGroup], ##); } + ; + + + +idList + : ID ( COMMA! ID )* + ; + + +initializer + : ( assignExpr + | LCURLY initializerList ( COMMA! )? RCURLY! + ) + { ## = #( #[NInitializer], ## ); } + ; + + +initializerList + : initializer ( COMMA! initializer )* + ; + + +declarator[boolean isFunctionDefinition] returns [String declName] + { declName = ""; } + : + ( pointerGroup )? + + ( id:ID { declName = id.getText(); } + | LPAREN declName = declarator[false] RPAREN + ) + + ( ! LPAREN + { + if (isFunctionDefinition) { + pushScope(declName); + } + else { + pushScope("!"+declName); + } + } + ( + (declSpecifiers)=> p:parameterTypeList + { + ## = #( null, ##, #( #[NParameterTypeList], #p ) ); + } + + | (i:idList)? + { + ## = #( null, ##, #( #[NParameterTypeList], #i ) ); + } + ) + { + popScope(); + } + RPAREN + | LBRACKET ( constExpr )? RBRACKET + )* + { ## = #( #[NDeclarator], ## ); } + ; + +parameterTypeList + : parameterDeclaration + ( options { + warnWhenFollowAmbig = false; + } : + COMMA! + parameterDeclaration + )* + ( COMMA! + VARARGS + )? + ; + + +parameterDeclaration + { String declName; } + : ds:declSpecifiers + ( ( declarator[false] )=> declName = d:declarator[false] + { + AST d2, ds2; + d2 = astFactory.dupList(#d); + ds2 = astFactory.dupList(#ds); + symbolTable.add(declName, #(null, ds2, d2)); + } + | nonemptyAbstractDeclarator + )? + { + ## = #( #[NParameterDeclaration], ## ); + } + ; + +/* JTC: + * This handles both new and old style functions. + * see declarator rule to see differences in parameters + * and here (declaration SEMI)* is the param type decls for the + * old style. may want to do some checking to check for illegal + * combinations (but I assume all parsed code will be legal?) + */ + +functionDef + { String declName; } + : ( (functionDeclSpecifiers)=> ds:functionDeclSpecifiers + | //epsilon + ) + declName = d:declarator[true] + { + AST d2, ds2; + d2 = astFactory.dupList(#d); + ds2 = astFactory.dupList(#ds); + symbolTable.add(declName, #(null, ds2, d2)); + pushScope(declName); + } + ( declaration )* (VARARGS)? ( SEMI! )* + { popScope(); } + compoundStatement[declName] + { ## = #( #[NFunctionDef], ## );} + ; + +functionDeclSpecifiers + { int specCount = 0; } + : ( options { // this loop properly aborts when + // it finds a non-typedefName ID MBZ + warnWhenFollowAmbig = false; + } : + functionStorageClassSpecifier + | typeQualifier + | ( "struct" | "union" | "enum" | typeSpecifier[specCount] )=> + specCount = typeSpecifier[specCount] + )+ + ; + +declarationList + : ( options { // this loop properly aborts when + // it finds a non-typedefName ID MBZ + warnWhenFollowAmbig = false; + } : + ( declarationPredictor )=> declaration + )+ + ; + +declarationPredictor + : (options { //only want to look at declaration if I don't see typedef + warnWhenFollowAmbig = false; + }: + "typedef" + | declaration + ) + ; + + +compoundStatement[String scopeName] + : LCURLY! + { + pushScope(scopeName); + } + ( ( declarationPredictor)=> declarationList )? + ( statementList )? + { popScope(); } + RCURLY! + { ## = #( #[NCompoundStatement, scopeName], ##); } + ; + + +statementList + : ( statement )+ + ; +statement + : SEMI // Empty statements + + | compoundStatement[getAScopeName()] // Group of statements + + | expr SEMI! { ## = #( #[NStatementExpr], ## ); } // Expressions + +// Iteration statements: + + | "while"^ LPAREN! expr RPAREN! statement + | "do"^ statement "while"! LPAREN! expr RPAREN! SEMI! + |! "for" + LPAREN ( e1:expr )? SEMI ( e2:expr )? SEMI ( e3:expr )? RPAREN + s:statement + { + if ( #e1 == null) { #e1 = (TNode) #[ NEmptyExpression ]; } + if ( #e2 == null) { #e2 = (TNode) #[ NEmptyExpression ]; } + if ( #e3 == null) { #e3 = (TNode) #[ NEmptyExpression ]; } + ## = #( #[LITERAL_for, "for"], #e1, #e2, #e3, #s ); + } + + +// Jump statements: + + | "goto"^ ID SEMI! + | "continue" SEMI! + | "break" SEMI! + | "return"^ ( expr )? SEMI! + + +// Labeled statements: + | ID COLON! (options {warnWhenFollowAmbig=false;}:statement)? { ## = #( #[NLabel], ## ); } + | "case"^ constExpr COLON! statement + | "default"^ COLON! statement + + + +// Selection statements: + + | "if"^ + LPAREN! expr RPAREN! statement + ( //standard if-else ambiguity + options { + warnWhenFollowAmbig = false; + } : + "else" statement )? + | "switch"^ LPAREN! expr RPAREN! statement + ; + + + + + + +expr + : assignExpr (options { + /* MBZ: + COMMA is ambiguous between comma expressions and + argument lists. argExprList should get priority, + and it does by being deeper in the expr rule tree + and using (COMMA assignExpr)* + */ + warnWhenFollowAmbig = false; + } : + c:COMMA^ { #c.setType(NCommaExpr); } assignExpr + )* + ; + + +assignExpr + : conditionalExpr ( a:assignOperator! assignExpr { ## = #( #a, ## );} )? + ; + +assignOperator + : ASSIGN + | DIV_ASSIGN + | PLUS_ASSIGN + | MINUS_ASSIGN + | STAR_ASSIGN + | MOD_ASSIGN + | RSHIFT_ASSIGN + | LSHIFT_ASSIGN + | BAND_ASSIGN + | BOR_ASSIGN + | BXOR_ASSIGN + ; + + +conditionalExpr + : logicalOrExpr + ( QUESTION^ expr COLON! conditionalExpr )? + ; + + +constExpr + : conditionalExpr + ; + +logicalOrExpr + : logicalAndExpr ( LOR^ logicalAndExpr )* + ; + + +logicalAndExpr + : inclusiveOrExpr ( LAND^ inclusiveOrExpr )* + ; + +inclusiveOrExpr + : exclusiveOrExpr ( BOR^ exclusiveOrExpr )* + ; + + +exclusiveOrExpr + : bitAndExpr ( BXOR^ bitAndExpr )* + ; + + +bitAndExpr + : equalityExpr ( BAND^ equalityExpr )* + ; + + + +equalityExpr + : relationalExpr + ( ( EQUAL^ | NOT_EQUAL^ ) relationalExpr )* + ; + + +relationalExpr + : shiftExpr + ( ( LT^ | LTE^ | GT^ | GTE^ ) shiftExpr )* + ; + + + +shiftExpr + : additiveExpr + ( ( LSHIFT^ | RSHIFT^ ) additiveExpr )* + ; + + +additiveExpr + : multExpr + ( ( PLUS^ | MINUS^ ) multExpr )* + ; + + +multExpr + : castExpr + ( ( STAR^ | DIV^ | MOD^ ) castExpr )* + ; + + +castExpr + : ( LPAREN typeName RPAREN )=> + LPAREN! typeName RPAREN! ( castExpr ) + { ## = #( #[NCast, "("], ## ); } + + | unaryExpr + ; + + +typeName + : specifierQualifierList (nonemptyAbstractDeclarator)? + ; + +nonemptyAbstractDeclarator + : ( + pointerGroup + ( (LPAREN + ( nonemptyAbstractDeclarator + | parameterTypeList + )? + RPAREN) + | (LBRACKET (expr)? RBRACKET) + )* + + | ( (LPAREN + ( nonemptyAbstractDeclarator + | parameterTypeList + )? + RPAREN) + | (LBRACKET (expr)? RBRACKET) + )+ + ) + { ## = #( #[NNonemptyAbstractDeclarator], ## ); } + + ; + +/* JTC: + +LR rules: + +abstractDeclarator + : nonemptyAbstractDeclarator + | // null + ; + +nonemptyAbstractDeclarator + : LPAREN nonemptyAbstractDeclarator RPAREN + | abstractDeclarator LPAREN RPAREN + | abstractDeclarator (LBRACKET (expr)? RBRACKET) + | STAR abstractDeclarator + ; +*/ + +unaryExpr + : postfixExpr + | INC^ unaryExpr + | DEC^ unaryExpr + | u:unaryOperator castExpr { ## = #( #[NUnaryExpr], ## ); } + + | "sizeof"^ + ( ( LPAREN typeName )=> LPAREN typeName RPAREN + | unaryExpr + ) + ; + + +unaryOperator + : BAND + | STAR + | PLUS + | MINUS + | BNOT + | LNOT + ; + +postfixExpr + : primaryExpr + ( + postfixSuffix {## = #( #[NPostfixExpr], ## );} + )? + ; +postfixSuffix + : + ( PTR ID + | DOT ID + | functionCall + | LBRACKET expr RBRACKET + | INC + | DEC + )+ + ; + +functionCall + : + LPAREN^ (a:argExprList)? RPAREN + { + ##.setType( NFunctionCallArgs ); + } + ; + + +primaryExpr + : ID + | charConst + | intConst + | floatConst + | stringConst + +// JTC: +// ID should catch the enumerator +// leaving it in gives ambiguous err +// | enumerator + | LPAREN! expr RPAREN! { ## = #( #[NExpressionGroup, "("], ## ); } + ; + +argExprList + : assignExpr ( COMMA! assignExpr )* + ; + + + +protected +charConst + : CharLiteral + ; + + +protected +stringConst + : (StringLiteral)+ { ## = #(#[NStringSeq], ##); } + ; + + +protected +intConst + : IntOctalConst + | LongOctalConst + | UnsignedOctalConst + | IntIntConst + | LongIntConst + | UnsignedIntConst + | IntHexConst + | LongHexConst + | UnsignedHexConst + ; + + +protected +floatConst + : FloatDoubleConst + | DoubleDoubleConst + | LongDoubleConst + ; + + + + + + +dummy + : NTypedefName + | NInitDecl + | NDeclarator + | NStructDeclarator + | NDeclaration + | NCast + | NPointerGroup + | NExpressionGroup + | NFunctionCallArgs + | NNonemptyAbstractDeclarator + | NInitializer + | NStatementExpr + | NEmptyExpression + | NParameterTypeList + | NFunctionDef + | NCompoundStatement + | NParameterDeclaration + | NCommaExpr + | NUnaryExpr + | NLabel + | NPostfixExpr + | NRangeExpr + | NStringSeq + | NInitializerElementLabel + | NLcurlyInitializer + | NAsmAttribute + | NGnuAsmExpr + | NTypeMissing + ; + + + + + + +{ + import java.io.*; + import antlr.*; +} + +class StdCLexer extends Lexer; + +options + { + k = 3; + exportVocab = STDC; + testLiterals = false; + } + +{ + LineObject lineObject = new LineObject(); + String originalSource = ""; + PreprocessorInfoChannel preprocessorInfoChannel = new PreprocessorInfoChannel(); + int tokenNumber = 0; + boolean countingTokens = true; + int deferredLineCount = 0; + + public void setCountingTokens(boolean ct) + { + countingTokens = ct; + if ( countingTokens ) { + tokenNumber = 0; + } + else { + tokenNumber = 1; + } + } + + public void setOriginalSource(String src) + { + originalSource = src; + lineObject.setSource(src); + } + public void setSource(String src) + { + lineObject.setSource(src); + } + + public PreprocessorInfoChannel getPreprocessorInfoChannel() + { + return preprocessorInfoChannel; + } + + public void setPreprocessingDirective(String pre) + { + preprocessorInfoChannel.addLineForTokenNumber( pre, new Integer(tokenNumber) ); + } + + public void addDefine(String name, String value) + { + } + + protected Token makeToken(int t) + { + if ( t != Token.SKIP && countingTokens) { + tokenNumber++; + } + CToken tok = (CToken) super.makeToken(t); + tok.setLine(lineObject.line); + tok.setSource(lineObject.source); + tok.setTokenNumber(tokenNumber); + + lineObject.line += deferredLineCount; + deferredLineCount = 0; + return tok; + } + + public void deferredNewline() { + deferredLineCount++; + } + + public void newline() { + lineObject.newline(); + } + + + + + + +} + +protected +Vocabulary + : '\3'..'\377' + ; + + +/* Operators: */ + +ASSIGN : '=' ; +COLON : ':' ; +COMMA : ',' ; +QUESTION : '?' ; +SEMI : ';' ; +PTR : "->" ; + + +// DOT & VARARGS are commented out since they are generated as part of +// the Number rule below due to some bizarre lexical ambiguity shme. + +// DOT : '.' ; +protected +DOT:; + +// VARARGS : "..." ; +protected +VARARGS:; + + +LPAREN : '(' ; +RPAREN : ')' ; +LBRACKET : '[' ; +RBRACKET : ']' ; +LCURLY : '{' ; +RCURLY : '}' ; + +EQUAL : "==" ; +NOT_EQUAL : "!=" ; +LTE : "<=" ; +LT : "<" ; +GTE : ">=" ; +GT : ">" ; + +DIV : '/' ; +DIV_ASSIGN : "/=" ; +PLUS : '+' ; +PLUS_ASSIGN : "+=" ; +INC : "++" ; +MINUS : '-' ; +MINUS_ASSIGN : "-=" ; +DEC : "--" ; +STAR : '*' ; +STAR_ASSIGN : "*=" ; +MOD : '%' ; +MOD_ASSIGN : "%=" ; +RSHIFT : ">>" ; +RSHIFT_ASSIGN : ">>=" ; +LSHIFT : "<<" ; +LSHIFT_ASSIGN : "<<=" ; + +LAND : "&&" ; +LNOT : '!' ; +LOR : "||" ; + +BAND : '&' ; +BAND_ASSIGN : "&=" ; +BNOT : '~' ; +BOR : '|' ; +BOR_ASSIGN : "|=" ; +BXOR : '^' ; +BXOR_ASSIGN : "^=" ; + + +Whitespace + : ( ( '\003'..'\010' | '\t' | '\013' | '\f' | '\016'.. '\037' | '\177'..'\377' | ' ' ) + | "\r\n" { newline(); } + | ( '\n' | '\r' ) { newline(); } + ) { _ttype = Token.SKIP; } + ; + + +Comment + : "/*" + ( { LA(2) != '/' }? '*' + | "\r\n" { deferredNewline(); } + | ( '\r' | '\n' ) { deferredNewline(); } + | ~( '*'| '\r' | '\n' ) + )* + "*/" { _ttype = Token.SKIP; + } + ; + + +CPPComment + : + "//" ( ~('\n') )* + { + _ttype = Token.SKIP; + } + ; + +protected NonWhitespace + : (~('\r' | '\n'))* + ; + + +PREPROC_DIRECTIVE +options { + paraphrase = "a line directive"; +} + + : + '#' + ( ( "line" || (( ' ' | '\t' | '\014')+ '0'..'9')) => LineDirective + | ( (Space)* "define" (Space)* i:ID (Space)* (n:DefineExpr)? + nw:NonWhitespace + ("\r\n" | "\r" | "\n") ) { + if (n != null) { + //System.out.println("addDefine: #define " + i.getText() + " " + n.getText()); + addDefine(i.getText(), n.getText()); + } else { + setPreprocessingDirective("#define " + i.getText() + " " + nw.getText()); + } + } + | (~'\n')* { setPreprocessingDirective(getText()); } + ) + { + _ttype = Token.SKIP; + } + ; + +DefineExpr: + ((LPAREN) (Space)* (DefineExpr2) (Space)* (RPAREN)) | (DefineExpr2) +; + +DefineExpr2: + (Number) + ((Space)* (LSHIFT | RSHIFT | PLUS | MINUS | STAR | DIV | MOD) (Space)* (DefineExpr))? +; + + +protected Space: + ( ' ' | '\t' | '\014') + ; + +protected LineDirective +{ + boolean oldCountingTokens = countingTokens; + countingTokens = false; +} +: + { + lineObject = new LineObject(); + deferredLineCount = 0; + } + ("line")? //this would be for if the directive started "#line", but not there for GNU directives + (Space)+ + n:Number { lineObject.setLine(Integer.parseInt(n.getText())); } + (Space)+ + ( fn:StringLiteral { try { + lineObject.setSource(fn.getText().substring(1,fn.getText().length()-1)); + } + catch (StringIndexOutOfBoundsException e) { /*not possible*/ } + } + | fi:ID { lineObject.setSource(fi.getText()); } + )? + (Space)* + ("1" { lineObject.setEnteringFile(true); } )? + (Space)* + ("2" { lineObject.setReturningToFile(true); } )? + (Space)* + ("3" { lineObject.setSystemHeader(true); } )? + (Space)* + ("4" { lineObject.setTreatAsC(true); } )? + (~('\r' | '\n'))* + ("\r\n" | "\r" | "\n") + { + preprocessorInfoChannel.addLineForTokenNumber(new LineObject(lineObject), new Integer(tokenNumber)); + countingTokens = oldCountingTokens; + } + ; + + + +/* Literals: */ + +/* + * Note that we do NOT handle tri-graphs nor multi-byte sequences. + */ + + +/* + * Note that we can't have empty character constants (even though we + * can have empty strings :-). + */ +CharLiteral + : '\'' ( Escape | ~( '\'' ) ) '\'' + ; + + +/* + * Can't have raw imbedded newlines in string constants. Strict reading of + * the standard gives odd dichotomy between newlines & carriage returns. + * Go figure. + */ +StringLiteral + : '"' + ( Escape + | ( + '\r' { deferredNewline(); } + | '\n' { + deferredNewline(); + _ttype = BadStringLiteral; + } + | '\\' '\n' { + deferredNewline(); + } + ) + | ~( '"' | '\r' | '\n' | '\\' ) + )* + '"' + ; + + +protected BadStringLiteral + : // Imaginary token. + ; + + +/* + * Handle the various escape sequences. + * + * Note carefully that these numeric escape *sequences* are *not* of the + * same form as the C language numeric *constants*. + * + * There is no such thing as a binary numeric escape sequence. + * + * Octal escape sequences are either 1, 2, or 3 octal digits exactly. + * + * There is no such thing as a decimal escape sequence. + * + * Hexadecimal escape sequences are begun with a leading \x and continue + * until a non-hexadecimal character is found. + * + * No real handling of tri-graph sequences, yet. + */ + +protected +Escape + : '\\' + ( options{warnWhenFollowAmbig=false;}: + 'a' + | 'b' + | 'f' + | 'n' + | 'r' + | 't' + | 'v' + | '"' + | '\'' + | '\\' + | '?' + | ('0'..'3') ( options{warnWhenFollowAmbig=false;}: Digit ( options{warnWhenFollowAmbig=false;}: Digit )? )? + | ('4'..'7') ( options{warnWhenFollowAmbig=false;}: Digit )? + | 'x' ( options{warnWhenFollowAmbig=false;}: Digit | 'a'..'f' | 'A'..'F' )+ + ) + ; + + +/* Numeric Constants: */ + +protected +Digit + : '0'..'9' + ; + +protected +LongSuffix + : 'l' + | 'L' + ; + +protected +UnsignedSuffix + : 'u' + | 'U' + ; + +protected +FloatSuffix + : 'f' + | 'F' + ; + +protected +Exponent + : ( 'e' | 'E' ) ( '+' | '-' )? ( Digit )+ + ; + + +protected +DoubleDoubleConst:; + +protected +FloatDoubleConst:; + +protected +LongDoubleConst:; + +protected +IntOctalConst:; + +protected +LongOctalConst:; + +protected +UnsignedOctalConst:; + +protected +IntIntConst:; + +protected +LongIntConst:; + +protected +UnsignedIntConst:; + +protected +IntHexConst:; + +protected +LongHexConst:; + +protected +UnsignedHexConst:; + + + + +Number + : ( ( Digit )+ ( '.' | 'e' | 'E' ) )=> ( Digit )+ + ( '.' ( Digit )* ( Exponent )? + | Exponent + ) { _ttype = DoubleDoubleConst; } + ( FloatSuffix { _ttype = FloatDoubleConst; } + | LongSuffix { _ttype = LongDoubleConst; } + )? + + | ( "..." )=> "..." { _ttype = VARARGS; } + + | '.' { _ttype = DOT; } + ( ( Digit )+ ( Exponent )? + { _ttype = DoubleDoubleConst; } + ( FloatSuffix { _ttype = FloatDoubleConst; } + | LongSuffix { _ttype = LongDoubleConst; } + )? + )? + + | '0' ( '0'..'7' )* { _ttype = IntOctalConst; } + ( LongSuffix { _ttype = LongOctalConst; } + | UnsignedSuffix { _ttype = UnsignedOctalConst; } + )? + + | '1'..'9' ( Digit )* { _ttype = IntIntConst; } + ( LongSuffix { _ttype = LongIntConst; } + | UnsignedSuffix { _ttype = UnsignedIntConst; } + )? + + | '0' ( 'x' | 'X' ) ( 'a'..'f' | 'A'..'F' | Digit )+ + { _ttype = IntHexConst; } + ( LongSuffix { _ttype = LongHexConst; } + | UnsignedSuffix { _ttype = UnsignedHexConst; } + )? + ; + + +ID + options + { + testLiterals = true; + } + : ( 'a'..'z' | 'A'..'Z' | '_' ) + ( 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' )* + ; + + diff --git a/src/main/antlr/com/jogamp/gluegen/jgram/JavaParser.g b/src/main/antlr/com/jogamp/gluegen/jgram/JavaParser.g new file mode 100644 index 0000000..f67579e --- /dev/null +++ b/src/main/antlr/com/jogamp/gluegen/jgram/JavaParser.g @@ -0,0 +1,1315 @@ +/* Java 1.3 Recognizer + * + * Run 'java Main [-showtree] directory-full-of-java-files' + * + * [The -showtree option pops up a Swing frame that shows + * the AST constructed from the parser.] + * + * Run 'java Main <directory full of java files>' + * + * Contributing authors: + * John Mitchell [email protected] + * Terence Parr [email protected] + * John Lilley [email protected] + * Scott Stanchfield [email protected] + * Markus Mohnen [email protected] + * Peter Williams [email protected] + * Allan Jacobs [email protected] + * Steve Messick [email protected] + * John Pybus [email protected] + * + * Version 1.00 December 9, 1997 -- initial release + * Version 1.01 December 10, 1997 + * fixed bug in octal def (0..7 not 0..8) + * Version 1.10 August 1998 (parrt) + * added tree construction + * fixed definition of WS,comments for mac,pc,unix newlines + * added unary plus + * Version 1.11 (Nov 20, 1998) + * Added "shutup" option to turn off last ambig warning. + * Fixed inner class def to allow named class defs as statements + * synchronized requires compound not simple statement + * add [] after builtInType DOT class in primaryExpression + * "const" is reserved but not valid..removed from modifiers + * Version 1.12 (Feb 2, 1999) + * Changed LITERAL_xxx to xxx in tree grammar. + * Updated java.g to use tokens {...} now for 2.6.0 (new feature). + * + * Version 1.13 (Apr 23, 1999) + * Didn't have (stat)? for else clause in tree parser. + * Didn't gen ASTs for interface extends. Updated tree parser too. + * Updated to 2.6.0. + * Version 1.14 (Jun 20, 1999) + * Allowed final/abstract on local classes. + * Removed local interfaces from methods + * Put instanceof precedence where it belongs...in relationalExpr + * It also had expr not type as arg; fixed it. + * Missing ! on SEMI in classBlock + * fixed: (expr) + "string" was parsed incorrectly (+ as unary plus). + * fixed: didn't like Object[].class in parser or tree parser + * Version 1.15 (Jun 26, 1999) + * Screwed up rule with instanceof in it. :( Fixed. + * Tree parser didn't like (expr).something; fixed. + * Allowed multiple inheritance in tree grammar. oops. + * Version 1.16 (August 22, 1999) + * Extending an interface built a wacky tree: had extra EXTENDS. + * Tree grammar didn't allow multiple superinterfaces. + * Tree grammar didn't allow empty var initializer: {} + * Version 1.17 (October 12, 1999) + * ESC lexer rule allowed 399 max not 377 max. + * java.tree.g didn't handle the expression of synchronized + * statements. + * Version 1.18 (August 12, 2001) + * Terence updated to Java 2 Version 1.3 by + * observing/combining work of Allan Jacobs and Steve + * Messick. Handles 1.3 src. Summary: + * o primary didn't include boolean.class kind of thing + * o constructor calls parsed explicitly now: + * see explicitConstructorInvocation + * o add strictfp modifier + * o missing objBlock after new expression in tree grammar + * o merged local class definition alternatives, moved after declaration + * o fixed problem with ClassName.super.field + * o reordered some alternatives to make things more efficient + * o long and double constants were not differentiated from int/float + * o whitespace rule was inefficient: matched only one char + * o add an examples directory with some nasty 1.3 cases + * o made Main.java use buffered IO and a Reader for Unicode support + * o supports UNICODE? + * Using Unicode charVocabulay makes code file big, but only + * in the bitsets at the end. I need to make ANTLR generate + * unicode bitsets more efficiently. + * Version 1.19 (April 25, 2002) + * Terence added in nice fixes by John Pybus concerning floating + * constants and problems with super() calls. John did a nice + * reorg of the primary/postfix expression stuff to read better + * and makes f.g.super() parse properly (it was METHOD_CALL not + * a SUPER_CTOR_CALL). Also: + * + * o "finally" clause was a root...made it a child of "try" + * o Added stuff for asserts too for Java 1.4, but *commented out* + * as it is not backward compatible. + * + * Version 1.20 (October 27, 2002) + * + * Terence ended up reorging John Pybus' stuff to + * remove some nondeterminisms and some syntactic predicates. + * Note that the grammar is stricter now; e.g., this(...) must + * be the first statement. + * + * Trinary ?: operator wasn't working as array name: + * (isBig ? bigDigits : digits)[i]; + * + * Checked parser/tree parser on source for + * Resin-2.0.5, jive-2.1.1, jdk 1.3.1, Lucene, antlr 2.7.2a4, + * and the 110k-line jGuru server source. + * + * Version 1.21 (October 17, 2003) + * Fixed lots of problems including: + * Ray Waldin: add typeDefinition to interfaceBlock in java.tree.g + * He found a problem/fix with floating point that start with 0 + * Ray also fixed problem that (int.class) was not recognized. + * Thorsten van Ellen noticed that \n are allowed incorrectly in strings. + * TJP fixed CHAR_LITERAL analogously. + * + * Version 1.22 (April 14, 2004) + * Changed vocab to be ..\uFFFE to avoid -1 char. removed dummy VOCAB rule. + * + * This grammar is in the PUBLIC DOMAIN + */ + +header { + package com.jogamp.gluegen.jgram; + + import java.util.*; + + import antlr.CommonAST; +} + +class JavaParser extends Parser; + +options { + k = 2; // two token lookahead + exportVocab=Java; // Call its vocabulary "Java" + codeGenMakeSwitchThreshold = 2; // Some optimizations + codeGenBitsetTestThreshold = 3; + defaultErrorHandler = false; // Don't generate parser error handlers + buildAST = true; + //buildAST = false; +} + +tokens { + BLOCK; MODIFIERS; OBJBLOCK; SLIST; CTOR_DEF; METHOD_DEF; VARIABLE_DEF; + INSTANCE_INIT; STATIC_INIT; TYPE; CLASS_DEF; INTERFACE_DEF; + PACKAGE_DEF; ARRAY_DECLARATOR; EXTENDS_CLAUSE; IMPLEMENTS_CLAUSE; + PARAMETERS; PARAMETER_DEF; LABELED_STAT; TYPECAST; INDEX_OP; + POST_INC; POST_DEC; METHOD_CALL; EXPR; ARRAY_INIT; + IMPORT; UNARY_MINUS; UNARY_PLUS; CASE_GROUP; ELIST; FOR_INIT; FOR_CONDITION; + FOR_ITERATOR; EMPTY_STAT; FINAL="final"; ABSTRACT="abstract"; + STRICTFP="strictfp"; SUPER_CTOR_CALL; CTOR_CALL; +} + +{ + public void clearParsedEnumNames() { + enumNames.clear(); + } + + /** Returns the EnumTypes this HeaderParser processed. */ + public Set<String> getParsedEnumNames() { + return enumNames; + } + + /** Clears the list of functions this HeaderParser has parsed. + Useful when reusing the same HeaderParser for more than one + header file. */ + public void clearParsedFunctionNames() { + functionNames.clear(); + } + + /** Returns the list of FunctionSymbols this HeaderParser has parsed. */ + public Set<String> getParsedFunctionNames() { + return functionNames; + } + + private Set<String> functionNames = new HashSet<String>(); + // hash from name of an enumerated value to the EnumType to which it belongs + private Set<String> enumNames = new HashSet<String>(); + + private int blockDepth = 0; +} + +// Compilation Unit: In Java, this is a single file. This is the start +// rule for this parser +compilationUnit + : // A compilation unit starts with an optional package definition + ( packageDefinition + | /* nothing */ + ) + + // Next we have a series of zero or more import statements + ( importDefinition )* + + // Wrapping things up with any number of class or interface + // definitions + ( typeDefinition )* + + EOF! + ; + + +// Package statement: "package" followed by an identifier. +packageDefinition + options {defaultErrorHandler = true;} // let ANTLR handle errors + : p:"package"^ {#p.setType(PACKAGE_DEF);} identifier SEMI! + ; + + +// Import statement: import followed by a package or class name +importDefinition + options {defaultErrorHandler = true;} + : i:"import"^ {#i.setType(IMPORT);} identifierStar SEMI! + ; + +// A type definition in a file is either a class or interface definition. +typeDefinition + options {defaultErrorHandler = true;} + : m:modifiers! + ( classDefinition[#m] + | interfaceDefinition[#m] + ) + | SEMI! + ; + +/** A declaration is the creation of a reference or primitive-type variable + * Create a separate Type/Var tree for each var in the var list. + */ +declaration! + : m:modifiers t:typeSpec[false] v:variableDefinitions[#m,#t] + {#declaration = #v;} + ; + +// A type specification is a type name with possible brackets afterwards +// (which would make it an array type). +typeSpec[boolean addImagNode] + : classTypeSpec[addImagNode] + | builtInTypeSpec[addImagNode] + ; + +// A class type specification is a class type with possible brackets afterwards +// (which would make it an array type). +classTypeSpec[boolean addImagNode] + : identifier (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)* + { + if ( addImagNode ) { + #classTypeSpec = #(#[TYPE,"TYPE"], #classTypeSpec); + } + } + ; + +// A builtin type specification is a builtin type with possible brackets +// afterwards (which would make it an array type). +builtInTypeSpec[boolean addImagNode] + : builtInType (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)* + { + if ( addImagNode ) { + #builtInTypeSpec = #(#[TYPE,"TYPE"], #builtInTypeSpec); + } + } + ; + +// A type name. which is either a (possibly qualified) class name or +// a primitive (builtin) type +type + : identifier + | builtInType + ; + +// The primitive types. +builtInType + : "void" + | "boolean" + | "byte" + | "char" + | "short" + | "int" + | "float" + | "long" + | "double" + ; + +// A (possibly-qualified) java identifier. We start with the first IDENT +// and expand its name by adding dots and following IDENTS +identifier + : IDENT ( DOT^ IDENT )* + ; + +identifierStar + : IDENT + ( DOT^ IDENT )* + ( DOT^ STAR )? + ; + +// A list of zero or more modifiers. We could have used (modifier)* in +// place of a call to modifiers, but I thought it was a good idea to keep +// this rule separate so they can easily be collected in a Vector if +// someone so desires +modifiers + : ( modifier )* + {#modifiers = #([MODIFIERS, "MODIFIERS"], #modifiers);} + ; + +// modifiers for Java classes, interfaces, class/instance vars and methods +modifier + : "private" + | "public" + | "protected" + | "static" + | "transient" + | "final" + | "abstract" + | "native" + | "threadsafe" + | "synchronized" +// | "const" // reserved word, but not valid + | "volatile" + | "strictfp" + ; + +// Definition of a Java class +classDefinition![AST modifiers] + : "class" IDENT + // it _might_ have a superclass... + sc:superClassClause + // it might implement some interfaces... + ic:implementsClause + // now parse the body of the class + cb:classBlock + {#classDefinition = #(#[CLASS_DEF,"CLASS_DEF"], + modifiers,IDENT,sc,ic,cb);} + ; + +superClassClause! + : ( "extends" id:identifier )? + {#superClassClause = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"],id);} + ; + +// Definition of a Java Interface +interfaceDefinition![AST modifiers] + : "interface" IDENT + // it might extend some other interfaces + ie:interfaceExtends + // now parse the body of the interface (looks like a class...) + cb:classBlock + {#interfaceDefinition = #(#[INTERFACE_DEF,"INTERFACE_DEF"], + modifiers,IDENT,ie,cb);} + ; + + +// This is the body of a class. You can have fields and extra semicolons, +// That's about it (until you see what a field is...) +classBlock + : LCURLY! { blockDepth++; } + ( field | SEMI! )* + RCURLY! { blockDepth--; } + {#classBlock = #([OBJBLOCK, "OBJBLOCK"], #classBlock);} + ; + +// An interface can extend several other interfaces... +interfaceExtends + : ( + e:"extends"! + identifier ( COMMA! identifier )* + )? + {#interfaceExtends = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"], + #interfaceExtends);} + ; + +// A class can implement several interfaces... +implementsClause + : ( + i:"implements"! identifier ( COMMA! identifier )* + )? + {#implementsClause = #(#[IMPLEMENTS_CLAUSE,"IMPLEMENTS_CLAUSE"], + #implementsClause);} + ; + +// Now the various things that can be defined inside a class or interface... +// Note that not all of these are really valid in an interface (constructors, +// for example), and if this grammar were used for a compiler there would +// need to be some semantic checks to make sure we're doing the right thing... +field! + : // method, constructor, or variable declaration + mods:modifiers + ( h:ctorHead s:constructorBody // constructor + {#field = #(#[CTOR_DEF,"CTOR_DEF"], mods, h, s);} + + | cd:classDefinition[#mods] // inner class + {#field = #cd;} + + | id:interfaceDefinition[#mods] // inner interface + {#field = #id;} + + | t:typeSpec[false] // method or variable declaration(s) + ( fn:IDENT // the name of the method + + // parse the formal parameter declarations. + LPAREN! param:parameterDeclarationList RPAREN! + + rt:declaratorBrackets[#t] + + // get the list of exceptions that this method is + // declared to throw + (tc:throwsClause)? + + ( s2:compoundStatement | SEMI ) + {#field = #(#[METHOD_DEF,"METHOD_DEF"], + mods, + #(#[TYPE,"TYPE"],rt), + fn, + param, + tc, + s2); + if(blockDepth==1) { + functionNames.add(fn.getText()); } } + | v:variableDefinitions[#mods,#t] SEMI +// {#field = #(#[VARIABLE_DEF,"VARIABLE_DEF"], v);} + {#field = #v;} + ) + ) + + // "static { ... }" class initializer + | "static" s3:compoundStatement + {#field = #(#[STATIC_INIT,"STATIC_INIT"], s3);} + + // "{ ... }" instance initializer + | s4:compoundStatement + {#field = #(#[INSTANCE_INIT,"INSTANCE_INIT"], s4);} + ; + +constructorBody + : lc:LCURLY^ {#lc.setType(SLIST); blockDepth++; } + ( options { greedy=true; } : explicitConstructorInvocation)? + (statement)* + RCURLY! { blockDepth--; } + ; + +/** Catch obvious constructor calls, but not the expr.super(...) calls */ +explicitConstructorInvocation + : "this"! lp1:LPAREN^ argList RPAREN! SEMI! + {#lp1.setType(CTOR_CALL);} + | "super"! lp2:LPAREN^ argList RPAREN! SEMI! + {#lp2.setType(SUPER_CTOR_CALL);} + ; + +variableDefinitions[AST mods, AST t] + : variableDeclarator[getASTFactory().dupTree(mods), + getASTFactory().dupTree(t)] + ( COMMA! + variableDeclarator[getASTFactory().dupTree(mods), + getASTFactory().dupTree(t)] + )* + ; + +/** Declaration of a variable. This can be a class/instance variable, + * or a local variable in a method + * It can also include possible initialization. + */ +variableDeclarator![AST mods, AST t] + : id:IDENT d:declaratorBrackets[t] v:varInitializer + {#variableDeclarator = #(#[VARIABLE_DEF,"VARIABLE_DEF"], mods, #(#[TYPE,"TYPE"],d), id, v); + if(blockDepth==1) { + enumNames.add(id.getText()); + } + } + ; + +declaratorBrackets[AST typ] + : {#declaratorBrackets=typ;} + (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)* + ; + +varInitializer + : ( ASSIGN^ initializer )? + ; + +// This is an initializer used to set up an array. +arrayInitializer + : lc:LCURLY^ {#lc.setType(ARRAY_INIT); blockDepth++; } + ( initializer + ( + // CONFLICT: does a COMMA after an initializer start a new + // initializer or start the option ',' at end? + // ANTLR generates proper code by matching + // the comma as soon as possible. + options { + warnWhenFollowAmbig = false; + } + : + COMMA! initializer + )* + (COMMA!)? + )? + RCURLY! { blockDepth--; } + ; + + +// The two "things" that can initialize an array element are an expression +// and another (nested) array initializer. +initializer + : expression + | arrayInitializer + ; + +// This is the header of a method. It includes the name and parameters +// for the method. +// This also watches for a list of exception classes in a "throws" clause. +ctorHead + : IDENT // the name of the method + + // parse the formal parameter declarations. + LPAREN! parameterDeclarationList RPAREN! + + // get the list of exceptions that this method is declared to throw + (throwsClause)? + ; + +// This is a list of exception classes that the method is declared to throw +throwsClause + : "throws"^ identifier ( COMMA! identifier )* + ; + + +// A list of formal parameters +parameterDeclarationList + : ( parameterDeclaration ( COMMA! parameterDeclaration )* )? + {#parameterDeclarationList = #(#[PARAMETERS,"PARAMETERS"], + #parameterDeclarationList);} + ; + +// A formal parameter. +parameterDeclaration! + : pm:parameterModifier t:typeSpec[false] id:IDENT + pd:declaratorBrackets[#t] + {#parameterDeclaration = #(#[PARAMETER_DEF,"PARAMETER_DEF"], + pm, #([TYPE,"TYPE"],pd), id);} + ; + +parameterModifier + : (f:"final")? + {#parameterModifier = #(#[MODIFIERS,"MODIFIERS"], f);} + ; + +// Compound statement. This is used in many contexts: +// Inside a class definition prefixed with "static": +// it is a class initializer +// Inside a class definition without "static": +// it is an instance initializer +// As the body of a method +// As a completely indepdent braced block of code inside a method +// it starts a new scope for variable definitions + +compoundStatement + : lc:LCURLY^ {#lc.setType(SLIST); blockDepth++; } + // include the (possibly-empty) list of statements + (statement)* + RCURLY! { blockDepth--; } + ; + + +statement + // A list of statements in curly braces -- start a new scope! + : compoundStatement + + // declarations are ambiguous with "ID DOT" relative to expression + // statements. Must backtrack to be sure. Could use a semantic + // predicate to test symbol table to see what the type was coming + // up, but that's pretty hard without a symbol table ;) + | (declaration)=> declaration SEMI! + + // An expression statement. This could be a method call, + // assignment statement, or any other expression evaluated for + // side-effects. + | expression SEMI! + + // class definition + | m:modifiers! classDefinition[#m] + + // Attach a label to the front of a statement + | IDENT c:COLON^ {#c.setType(LABELED_STAT);} statement + + // If-else statement + | "if"^ LPAREN! expression RPAREN! statement + ( + // CONFLICT: the old "dangling-else" problem... + // ANTLR generates proper code matching + // as soon as possible. Hush warning. + options { + warnWhenFollowAmbig = false; + } + : + "else"! statement + )? + + // For statement + | "for"^ + LPAREN! + forInit SEMI! // initializer + forCond SEMI! // condition test + forIter // updater + RPAREN! + statement // statement to loop over + + // While statement + | "while"^ LPAREN! expression RPAREN! statement + + // do-while statement + | "do"^ statement "while"! LPAREN! expression RPAREN! SEMI! + + // get out of a loop (or switch) + | "break"^ (IDENT)? SEMI! + + // do next iteration of a loop + | "continue"^ (IDENT)? SEMI! + + // Return an expression + | "return"^ (expression)? SEMI! + + // switch/case statement + | "switch"^ LPAREN! expression RPAREN! LCURLY! { blockDepth++; } + ( casesGroup )* + RCURLY! { blockDepth--; } + + // exception try-catch block + | tryBlock + + // throw an exception + | "throw"^ expression SEMI! + + // synchronize a statement + | "synchronized"^ LPAREN! expression RPAREN! compoundStatement + + // asserts (uncomment if you want 1.4 compatibility) + // | "assert"^ expression ( COLON! expression )? SEMI! + + // empty statement + | s:SEMI {#s.setType(EMPTY_STAT);} + ; + +casesGroup + : ( // CONFLICT: to which case group do the statements bind? + // ANTLR generates proper code: it groups the + // many "case"/"default" labels together then + // follows them with the statements + options { + greedy = true; + } + : + aCase + )+ + caseSList + {#casesGroup = #([CASE_GROUP, "CASE_GROUP"], #casesGroup);} + ; + +aCase + : ("case"^ expression | "default") COLON! + ; + +caseSList + : (statement)* + {#caseSList = #(#[SLIST,"SLIST"],#caseSList);} + ; + +// The initializer for a for loop +forInit + // if it looks like a declaration, it is + : ( (declaration)=> declaration + // otherwise it could be an expression list... + | expressionList + )? + {#forInit = #(#[FOR_INIT,"FOR_INIT"],#forInit);} + ; + +forCond + : (expression)? + {#forCond = #(#[FOR_CONDITION,"FOR_CONDITION"],#forCond);} + ; + +forIter + : (expressionList)? + {#forIter = #(#[FOR_ITERATOR,"FOR_ITERATOR"],#forIter);} + ; + +// an exception handler try/catch block +tryBlock + : "try"^ compoundStatement + (handler)* + ( finallyClause )? + ; + +finallyClause + : "finally"^ compoundStatement + ; + +// an exception handler +handler + : "catch"^ LPAREN! parameterDeclaration RPAREN! compoundStatement + ; + + +// expressions +// Note that most of these expressions follow the pattern +// thisLevelExpression : +// nextHigherPrecedenceExpression +// (OPERATOR nextHigherPrecedenceExpression)* +// which is a standard recursive definition for a parsing an expression. +// The operators in java have the following precedences: +// lowest (13) = *= /= %= += -= <<= >>= >>>= &= ^= |= +// (12) ?: +// (11) || +// (10) && +// ( 9) | +// ( 8) ^ +// ( 7) & +// ( 6) == != +// ( 5) < <= > >= +// ( 4) << >> +// ( 3) +(binary) -(binary) +// ( 2) * / % +// ( 1) ++ -- +(unary) -(unary) ~ ! (type) +// [] () (method call) . (dot -- identifier qualification) +// new () (explicit parenthesis) +// +// the last two are not usually on a precedence chart; I put them in +// to point out that new has a higher precedence than '.', so you +// can validy use +// new Frame().show() +// +// Note that the above precedence levels map to the rules below... +// Once you have a precedence chart, writing the appropriate rules as below +// is usually very straightfoward + + + +// the mother of all expressions +expression + : assignmentExpression + {#expression = #(#[EXPR,"EXPR"],#expression);} + ; + + +// This is a list of expressions. +expressionList + : expression (COMMA! expression)* + {#expressionList = #(#[ELIST,"ELIST"], expressionList);} + ; + + +// assignment expression (level 13) +assignmentExpression + : conditionalExpression + ( ( ASSIGN^ + | PLUS_ASSIGN^ + | MINUS_ASSIGN^ + | STAR_ASSIGN^ + | DIV_ASSIGN^ + | MOD_ASSIGN^ + | SR_ASSIGN^ + | BSR_ASSIGN^ + | SL_ASSIGN^ + | BAND_ASSIGN^ + | BXOR_ASSIGN^ + | BOR_ASSIGN^ + ) + assignmentExpression + )? + ; + + +// conditional test (level 12) +conditionalExpression + : logicalOrExpression + ( QUESTION^ assignmentExpression COLON! conditionalExpression )? + ; + + +// logical or (||) (level 11) +logicalOrExpression + : logicalAndExpression (LOR^ logicalAndExpression)* + ; + + +// logical and (&&) (level 10) +logicalAndExpression + : inclusiveOrExpression (LAND^ inclusiveOrExpression)* + ; + + +// bitwise or non-short-circuiting or (|) (level 9) +inclusiveOrExpression + : exclusiveOrExpression (BOR^ exclusiveOrExpression)* + ; + + +// exclusive or (^) (level 8) +exclusiveOrExpression + : andExpression (BXOR^ andExpression)* + ; + + +// bitwise or non-short-circuiting and (&) (level 7) +andExpression + : equalityExpression (BAND^ equalityExpression)* + ; + + +// equality/inequality (==/!=) (level 6) +equalityExpression + : relationalExpression ((NOT_EQUAL^ | EQUAL^) relationalExpression)* + ; + + +// boolean relational expressions (level 5) +relationalExpression + : shiftExpression + ( ( ( LT^ + | GT^ + | LE^ + | GE^ + ) + shiftExpression + )* + | "instanceof"^ typeSpec[true] + ) + ; + + +// bit shift expressions (level 4) +shiftExpression + : additiveExpression ((SL^ | SR^ | BSR^) additiveExpression)* + ; + + +// binary addition/subtraction (level 3) +additiveExpression + : multiplicativeExpression ((PLUS^ | MINUS^) multiplicativeExpression)* + ; + + +// multiplication/division/modulo (level 2) +multiplicativeExpression + : unaryExpression ((STAR^ | DIV^ | MOD^ ) unaryExpression)* + ; + +unaryExpression + : INC^ unaryExpression + | DEC^ unaryExpression + | MINUS^ {#MINUS.setType(UNARY_MINUS);} unaryExpression + | PLUS^ {#PLUS.setType(UNARY_PLUS);} unaryExpression + | unaryExpressionNotPlusMinus + ; + +unaryExpressionNotPlusMinus + : BNOT^ unaryExpression + | LNOT^ unaryExpression + + // use predicate to skip cases like: (int.class) + | (LPAREN builtInTypeSpec[true] RPAREN) => + lpb:LPAREN^ {#lpb.setType(TYPECAST);} builtInTypeSpec[true] RPAREN! + unaryExpression + + // Have to backtrack to see if operator follows. If no operator + // follows, it's a typecast. No semantic checking needed to parse. + // if it _looks_ like a cast, it _is_ a cast; else it's a "(expr)" + | (LPAREN classTypeSpec[true] RPAREN unaryExpressionNotPlusMinus)=> + lp:LPAREN^ {#lp.setType(TYPECAST);} classTypeSpec[true] RPAREN! + unaryExpressionNotPlusMinus + + | postfixExpression + ; + +// qualified names, array expressions, method invocation, post inc/dec +postfixExpression + : + /* + "this"! lp1:LPAREN^ argList RPAREN! + {#lp1.setType(CTOR_CALL);} + + | "super"! lp2:LPAREN^ argList RPAREN! + {#lp2.setType(SUPER_CTOR_CALL);} + | + */ + primaryExpression + + ( + /* + options { + // the use of postfixExpression in SUPER_CTOR_CALL adds DOT + // to the lookahead set, and gives loads of false non-det + // warnings. + // shut them off. + generateAmbigWarnings=false; + } + : */ + DOT^ IDENT + ( lp:LPAREN^ {#lp.setType(METHOD_CALL);} + argList + RPAREN! + )? + | DOT^ "this" + + | DOT^ "super" + ( // (new Outer()).super() (create enclosing instance) + lp3:LPAREN^ argList RPAREN! + {#lp3.setType(SUPER_CTOR_CALL);} + | DOT^ IDENT + ( lps:LPAREN^ {#lps.setType(METHOD_CALL);} + argList + RPAREN! + )? + ) + | DOT^ newExpression + | lb:LBRACK^ {#lb.setType(INDEX_OP);} expression RBRACK! + )* + + ( // possibly add on a post-increment or post-decrement. + // allows INC/DEC on too much, but semantics can check + in:INC^ {#in.setType(POST_INC);} + | de:DEC^ {#de.setType(POST_DEC);} + )? + ; + +// the basic element of an expression +primaryExpression + : identPrimary ( options {greedy=true;} : DOT^ "class" )? + | constant + | "true" + | "false" + | "null" + | newExpression + | "this" + | "super" + | LPAREN! assignmentExpression RPAREN! + // look for int.class and int[].class + | builtInType + ( lbt:LBRACK^ {#lbt.setType(ARRAY_DECLARATOR);} RBRACK! )* + DOT^ "class" + ; + +/** Match a, a.b.c refs, a.b.c(...) refs, a.b.c[], a.b.c[].class, + * and a.b.c.class refs. Also this(...) and super(...). Match + * this or super. + */ +identPrimary + : IDENT + ( + options { + // .ident could match here or in postfixExpression. + // We do want to match here. Turn off warning. + greedy=true; + } + : DOT^ IDENT + )* + ( + options { + // ARRAY_DECLARATOR here conflicts with INDEX_OP in + // postfixExpression on LBRACK RBRACK. + // We want to match [] here, so greedy. This overcomes + // limitation of linear approximate lookahead. + greedy=true; + } + : ( lp:LPAREN^ {#lp.setType(METHOD_CALL);} argList RPAREN! ) + | ( options {greedy=true;} : + lbc:LBRACK^ {#lbc.setType(ARRAY_DECLARATOR);} RBRACK! + )+ + )? + ; + +/** object instantiation. + * Trees are built as illustrated by the following input/tree pairs: + * + * new T() + * + * new + * | + * T -- ELIST + * | + * arg1 -- arg2 -- .. -- argn + * + * new int[] + * + * new + * | + * int -- ARRAY_DECLARATOR + * + * new int[] {1,2} + * + * new + * | + * int -- ARRAY_DECLARATOR -- ARRAY_INIT + * | + * EXPR -- EXPR + * | | + * 1 2 + * + * new int[3] + * new + * | + * int -- ARRAY_DECLARATOR + * | + * EXPR + * | + * 3 + * + * new int[1][2] + * + * new + * | + * int -- ARRAY_DECLARATOR + * | + * ARRAY_DECLARATOR -- EXPR + * | | + * EXPR 1 + * | + * 2 + * + */ +newExpression + : "new"^ type + ( LPAREN! argList RPAREN! (classBlock)? + + //java 1.1 + // Note: This will allow bad constructs like + // new int[4][][3] {exp,exp}. + // There needs to be a semantic check here... + // to make sure: + // a) [ expr ] and [ ] are not mixed + // b) [ expr ] and an init are not used together + + | newArrayDeclarator (arrayInitializer)? + ) + ; + +argList + : ( expressionList + | /*nothing*/ + {#argList = #[ELIST,"ELIST"];} + ) + ; + +newArrayDeclarator + : ( + // CONFLICT: + // newExpression is a primaryExpression which can be + // followed by an array index reference. This is ok, + // as the generated code will stay in this loop as + // long as it sees an LBRACK (proper behavior) + options { + warnWhenFollowAmbig = false; + } + : + lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} + (expression)? + RBRACK! + )+ + ; + +constant + : NUM_INT + | CHAR_LITERAL + | STRING_LITERAL + | NUM_FLOAT + | NUM_LONG + | NUM_DOUBLE + ; + +//---------------------------------------------------------------------------- +// The Java scanner +//---------------------------------------------------------------------------- +class JavaLexer extends Lexer; + +options { + exportVocab=Java; // call the vocabulary "Java" + testLiterals=false; // don't automatically test for literals + k=4; // four characters of lookahead + charVocabulary='\u0003'..'\u7FFE'; + // without inlining some bitset tests, couldn't do unicode; + // I need to make ANTLR generate smaller bitsets; see + // bottom of JavaLexer.java + codeGenBitsetTestThreshold=20; +} + +// OPERATORS +QUESTION : '?' ; +LPAREN : '(' ; +RPAREN : ')' ; +LBRACK : '[' ; +RBRACK : ']' ; +LCURLY : '{' ; +RCURLY : '}' ; +COLON : ':' ; +COMMA : ',' ; +//DOT : '.' ; +ASSIGN : '=' ; +EQUAL : "==" ; +LNOT : '!' ; +BNOT : '~' ; +NOT_EQUAL : "!=" ; +DIV : '/' ; +DIV_ASSIGN : "/=" ; +PLUS : '+' ; +PLUS_ASSIGN : "+=" ; +INC : "++" ; +MINUS : '-' ; +MINUS_ASSIGN : "-=" ; +DEC : "--" ; +STAR : '*' ; +STAR_ASSIGN : "*=" ; +MOD : '%' ; +MOD_ASSIGN : "%=" ; +SR : ">>" ; +SR_ASSIGN : ">>=" ; +BSR : ">>>" ; +BSR_ASSIGN : ">>>=" ; +GE : ">=" ; +GT : ">" ; +SL : "<<" ; +SL_ASSIGN : "<<=" ; +LE : "<=" ; +LT : '<' ; +BXOR : '^' ; +BXOR_ASSIGN : "^=" ; +BOR : '|' ; +BOR_ASSIGN : "|=" ; +LOR : "||" ; +BAND : '&' ; +BAND_ASSIGN : "&=" ; +LAND : "&&" ; +SEMI : ';' ; + + +// Whitespace -- ignored +WS : ( ' ' + | '\t' + | '\f' + // handle newlines + | ( options {generateAmbigWarnings=false;} + : "\r\n" // Evil DOS + | '\r' // Macintosh + | '\n' // Unix (the right way) + ) + { newline(); } + )+ + { _ttype = Token.SKIP; } + ; + +// Single-line comments +SL_COMMENT + : "//" + (~('\n'|'\r'))* ('\n'|'\r'('\n')?)? + {$setType(Token.SKIP); newline();} + ; + +// multiple-line comments +ML_COMMENT + : "/*" + ( /* '\r' '\n' can be matched in one alternative or by matching + '\r' in one iteration and '\n' in another. I am trying to + handle any flavor of newline that comes in, but the language + that allows both "\r\n" and "\r" and "\n" to all be valid + newline is ambiguous. Consequently, the resulting grammar + must be ambiguous. I'm shutting this warning off. + */ + options { + generateAmbigWarnings=false; + } + : + { LA(2)!='/' }? '*' + | '\r' '\n' {newline();} + | '\r' {newline();} + | '\n' {newline();} + | ~('*'|'\n'|'\r') + )* + "*/" + {$setType(Token.SKIP);} + ; + + +// character literals +CHAR_LITERAL + : '\'' ( ESC | ~('\''|'\n'|'\r'|'\\') ) '\'' + ; + +// string literals +STRING_LITERAL + : '"' (ESC|~('"'|'\\'|'\n'|'\r'))* '"' + ; + + +// escape sequence -- note that this is protected; it can only be called +// from another lexer rule -- it will not ever directly return a token to +// the parser +// There are various ambiguities hushed in this rule. The optional +// '0'...'9' digit matches should be matched here rather than letting +// them go back to STRING_LITERAL to be matched. ANTLR does the +// right thing by matching immediately; hence, it's ok to shut off +// the FOLLOW ambig warnings. +protected +ESC + : '\\' + ( 'n' + | 'r' + | 't' + | 'b' + | 'f' + | '"' + | '\'' + | '\\' + | ('u')+ HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT + | '0'..'3' + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'7' + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'7' + )? + )? + | '4'..'7' + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'7' + )? + ) + ; + + +// hexadecimal digit (again, note it's protected!) +protected +HEX_DIGIT + : ('0'..'9'|'A'..'F'|'a'..'f') + ; + + +// an identifier. Note that testLiterals is set to true! This means +// that after we match the rule, we look in the literals table to see +// if it's a literal or really an identifer +IDENT + options {testLiterals=true;} + : ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')* + ; + + +// a numeric literal +NUM_INT + {boolean isDecimal=false; Token t=null;} + : '.' {_ttype = DOT;} + ( ('0'..'9')+ (EXPONENT)? (f1:FLOAT_SUFFIX {t=f1;})? + { + if (t != null && t.getText().toUpperCase().indexOf('F')>=0) { + _ttype = NUM_FLOAT; + } + else { + _ttype = NUM_DOUBLE; // assume double + } + } + )? + + | ( '0' {isDecimal = true;} // special case for just '0' + ( ('x'|'X') + ( // hex + // the 'e'|'E' and float suffix stuff look + // like hex digits, hence the (...)+ doesn't + // know when to stop: ambig. ANTLR resolves + // it correctly by matching immediately. It + // is therefor ok to hush warning. + options { + warnWhenFollowAmbig=false; + } + : HEX_DIGIT + )+ + + | //float or double with leading zero + (('0'..'9')+ ('.'|EXPONENT|FLOAT_SUFFIX)) => ('0'..'9')+ + + | ('0'..'7')+ // octal + )? + | ('1'..'9') ('0'..'9')* {isDecimal=true;} // non-zero decimal + ) + ( ('l'|'L') { _ttype = NUM_LONG; } + + // only check to see if it's a float if looks like decimal so far + | {isDecimal}? + ( '.' ('0'..'9')* (EXPONENT)? (f2:FLOAT_SUFFIX {t=f2;})? + | EXPONENT (f3:FLOAT_SUFFIX {t=f3;})? + | f4:FLOAT_SUFFIX {t=f4;} + ) + { + if (t != null && t.getText().toUpperCase() .indexOf('F') >= 0) { + _ttype = NUM_FLOAT; + } + else { + _ttype = NUM_DOUBLE; // assume double + } + } + )? + ; + + +// a couple protected methods to assist in matching floating point numbers +protected +EXPONENT + : ('e'|'E') ('+'|'-')? ('0'..'9')+ + ; + + +protected +FLOAT_SUFFIX + : 'f'|'F'|'d'|'D' + ; + |