summaryrefslogtreecommitdiffstats
path: root/src/java/com/sun/gluegen/jgram
diff options
context:
space:
mode:
Diffstat (limited to 'src/java/com/sun/gluegen/jgram')
-rw-r--r--src/java/com/sun/gluegen/jgram/JavaParser.g1315
-rw-r--r--src/java/com/sun/gluegen/jgram/Test.java132
2 files changed, 1447 insertions, 0 deletions
diff --git a/src/java/com/sun/gluegen/jgram/JavaParser.g b/src/java/com/sun/gluegen/jgram/JavaParser.g
new file mode 100644
index 0000000..e3a1962
--- /dev/null
+++ b/src/java/com/sun/gluegen/jgram/JavaParser.g
@@ -0,0 +1,1315 @@
+/* Java 1.3 Recognizer
+ *
+ * Run 'java Main [-showtree] directory-full-of-java-files'
+ *
+ * [The -showtree option pops up a Swing frame that shows
+ * the AST constructed from the parser.]
+ *
+ * Run 'java Main <directory full of java files>'
+ *
+ * Contributing authors:
+ * John Mitchell [email protected]
+ * Terence Parr [email protected]
+ * John Lilley [email protected]
+ * Scott Stanchfield [email protected]
+ * Markus Mohnen [email protected]
+ * Peter Williams [email protected]
+ * Allan Jacobs [email protected]
+ * Steve Messick [email protected]
+ * John Pybus [email protected]
+ *
+ * Version 1.00 December 9, 1997 -- initial release
+ * Version 1.01 December 10, 1997
+ * fixed bug in octal def (0..7 not 0..8)
+ * Version 1.10 August 1998 (parrt)
+ * added tree construction
+ * fixed definition of WS,comments for mac,pc,unix newlines
+ * added unary plus
+ * Version 1.11 (Nov 20, 1998)
+ * Added "shutup" option to turn off last ambig warning.
+ * Fixed inner class def to allow named class defs as statements
+ * synchronized requires compound not simple statement
+ * add [] after builtInType DOT class in primaryExpression
+ * "const" is reserved but not valid..removed from modifiers
+ * Version 1.12 (Feb 2, 1999)
+ * Changed LITERAL_xxx to xxx in tree grammar.
+ * Updated java.g to use tokens {...} now for 2.6.0 (new feature).
+ *
+ * Version 1.13 (Apr 23, 1999)
+ * Didn't have (stat)? for else clause in tree parser.
+ * Didn't gen ASTs for interface extends. Updated tree parser too.
+ * Updated to 2.6.0.
+ * Version 1.14 (Jun 20, 1999)
+ * Allowed final/abstract on local classes.
+ * Removed local interfaces from methods
+ * Put instanceof precedence where it belongs...in relationalExpr
+ * It also had expr not type as arg; fixed it.
+ * Missing ! on SEMI in classBlock
+ * fixed: (expr) + "string" was parsed incorrectly (+ as unary plus).
+ * fixed: didn't like Object[].class in parser or tree parser
+ * Version 1.15 (Jun 26, 1999)
+ * Screwed up rule with instanceof in it. :( Fixed.
+ * Tree parser didn't like (expr).something; fixed.
+ * Allowed multiple inheritance in tree grammar. oops.
+ * Version 1.16 (August 22, 1999)
+ * Extending an interface built a wacky tree: had extra EXTENDS.
+ * Tree grammar didn't allow multiple superinterfaces.
+ * Tree grammar didn't allow empty var initializer: {}
+ * Version 1.17 (October 12, 1999)
+ * ESC lexer rule allowed 399 max not 377 max.
+ * java.tree.g didn't handle the expression of synchronized
+ * statements.
+ * Version 1.18 (August 12, 2001)
+ * Terence updated to Java 2 Version 1.3 by
+ * observing/combining work of Allan Jacobs and Steve
+ * Messick. Handles 1.3 src. Summary:
+ * o primary didn't include boolean.class kind of thing
+ * o constructor calls parsed explicitly now:
+ * see explicitConstructorInvocation
+ * o add strictfp modifier
+ * o missing objBlock after new expression in tree grammar
+ * o merged local class definition alternatives, moved after declaration
+ * o fixed problem with ClassName.super.field
+ * o reordered some alternatives to make things more efficient
+ * o long and double constants were not differentiated from int/float
+ * o whitespace rule was inefficient: matched only one char
+ * o add an examples directory with some nasty 1.3 cases
+ * o made Main.java use buffered IO and a Reader for Unicode support
+ * o supports UNICODE?
+ * Using Unicode charVocabulay makes code file big, but only
+ * in the bitsets at the end. I need to make ANTLR generate
+ * unicode bitsets more efficiently.
+ * Version 1.19 (April 25, 2002)
+ * Terence added in nice fixes by John Pybus concerning floating
+ * constants and problems with super() calls. John did a nice
+ * reorg of the primary/postfix expression stuff to read better
+ * and makes f.g.super() parse properly (it was METHOD_CALL not
+ * a SUPER_CTOR_CALL). Also:
+ *
+ * o "finally" clause was a root...made it a child of "try"
+ * o Added stuff for asserts too for Java 1.4, but *commented out*
+ * as it is not backward compatible.
+ *
+ * Version 1.20 (October 27, 2002)
+ *
+ * Terence ended up reorging John Pybus' stuff to
+ * remove some nondeterminisms and some syntactic predicates.
+ * Note that the grammar is stricter now; e.g., this(...) must
+ * be the first statement.
+ *
+ * Trinary ?: operator wasn't working as array name:
+ * (isBig ? bigDigits : digits)[i];
+ *
+ * Checked parser/tree parser on source for
+ * Resin-2.0.5, jive-2.1.1, jdk 1.3.1, Lucene, antlr 2.7.2a4,
+ * and the 110k-line jGuru server source.
+ *
+ * Version 1.21 (October 17, 2003)
+ * Fixed lots of problems including:
+ * Ray Waldin: add typeDefinition to interfaceBlock in java.tree.g
+ * He found a problem/fix with floating point that start with 0
+ * Ray also fixed problem that (int.class) was not recognized.
+ * Thorsten van Ellen noticed that \n are allowed incorrectly in strings.
+ * TJP fixed CHAR_LITERAL analogously.
+ *
+ * Version 1.22 (April 14, 2004)
+ * Changed vocab to be ..\uFFFE to avoid -1 char. removed dummy VOCAB rule.
+ *
+ * This grammar is in the PUBLIC DOMAIN
+ */
+
+header {
+ package com.sun.gluegen.jgram;
+
+ import java.util.*;
+
+ import antlr.CommonAST;
+}
+
+class JavaParser extends Parser;
+
+options {
+ k = 2; // two token lookahead
+ exportVocab=Java; // Call its vocabulary "Java"
+ codeGenMakeSwitchThreshold = 2; // Some optimizations
+ codeGenBitsetTestThreshold = 3;
+ defaultErrorHandler = false; // Don't generate parser error handlers
+ buildAST = true;
+ //buildAST = false;
+}
+
+tokens {
+ BLOCK; MODIFIERS; OBJBLOCK; SLIST; CTOR_DEF; METHOD_DEF; VARIABLE_DEF;
+ INSTANCE_INIT; STATIC_INIT; TYPE; CLASS_DEF; INTERFACE_DEF;
+ PACKAGE_DEF; ARRAY_DECLARATOR; EXTENDS_CLAUSE; IMPLEMENTS_CLAUSE;
+ PARAMETERS; PARAMETER_DEF; LABELED_STAT; TYPECAST; INDEX_OP;
+ POST_INC; POST_DEC; METHOD_CALL; EXPR; ARRAY_INIT;
+ IMPORT; UNARY_MINUS; UNARY_PLUS; CASE_GROUP; ELIST; FOR_INIT; FOR_CONDITION;
+ FOR_ITERATOR; EMPTY_STAT; FINAL="final"; ABSTRACT="abstract";
+ STRICTFP="strictfp"; SUPER_CTOR_CALL; CTOR_CALL;
+}
+
+{
+ public void clearParsedEnumNames() {
+ enumNames.clear();
+ }
+
+ /** Returns the EnumTypes this HeaderParser processed. */
+ public Set getParsedEnumNames() {
+ return enumNames;
+ }
+
+ /** Clears the list of functions this HeaderParser has parsed.
+ Useful when reusing the same HeaderParser for more than one
+ header file. */
+ public void clearParsedFunctionNames() {
+ functionNames.clear();
+ }
+
+ /** Returns the list of FunctionSymbols this HeaderParser has parsed. */
+ public Set getParsedFunctionNames() {
+ return functionNames;
+ }
+
+ private Set/*<String>*/ functionNames = new HashSet();
+ // hash from name of an enumerated value to the EnumType to which it belongs
+ private Set/*<String>*/ enumNames = new HashSet();
+
+ private int blockDepth = 0;
+}
+
+// Compilation Unit: In Java, this is a single file. This is the start
+// rule for this parser
+compilationUnit
+ : // A compilation unit starts with an optional package definition
+ ( packageDefinition
+ | /* nothing */
+ )
+
+ // Next we have a series of zero or more import statements
+ ( importDefinition )*
+
+ // Wrapping things up with any number of class or interface
+ // definitions
+ ( typeDefinition )*
+
+ EOF!
+ ;
+
+
+// Package statement: "package" followed by an identifier.
+packageDefinition
+ options {defaultErrorHandler = true;} // let ANTLR handle errors
+ : p:"package"^ {#p.setType(PACKAGE_DEF);} identifier SEMI!
+ ;
+
+
+// Import statement: import followed by a package or class name
+importDefinition
+ options {defaultErrorHandler = true;}
+ : i:"import"^ {#i.setType(IMPORT);} identifierStar SEMI!
+ ;
+
+// A type definition in a file is either a class or interface definition.
+typeDefinition
+ options {defaultErrorHandler = true;}
+ : m:modifiers!
+ ( classDefinition[#m]
+ | interfaceDefinition[#m]
+ )
+ | SEMI!
+ ;
+
+/** A declaration is the creation of a reference or primitive-type variable
+ * Create a separate Type/Var tree for each var in the var list.
+ */
+declaration!
+ : m:modifiers t:typeSpec[false] v:variableDefinitions[#m,#t]
+ {#declaration = #v;}
+ ;
+
+// A type specification is a type name with possible brackets afterwards
+// (which would make it an array type).
+typeSpec[boolean addImagNode]
+ : classTypeSpec[addImagNode]
+ | builtInTypeSpec[addImagNode]
+ ;
+
+// A class type specification is a class type with possible brackets afterwards
+// (which would make it an array type).
+classTypeSpec[boolean addImagNode]
+ : identifier (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
+ {
+ if ( addImagNode ) {
+ #classTypeSpec = #(#[TYPE,"TYPE"], #classTypeSpec);
+ }
+ }
+ ;
+
+// A builtin type specification is a builtin type with possible brackets
+// afterwards (which would make it an array type).
+builtInTypeSpec[boolean addImagNode]
+ : builtInType (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
+ {
+ if ( addImagNode ) {
+ #builtInTypeSpec = #(#[TYPE,"TYPE"], #builtInTypeSpec);
+ }
+ }
+ ;
+
+// A type name. which is either a (possibly qualified) class name or
+// a primitive (builtin) type
+type
+ : identifier
+ | builtInType
+ ;
+
+// The primitive types.
+builtInType
+ : "void"
+ | "boolean"
+ | "byte"
+ | "char"
+ | "short"
+ | "int"
+ | "float"
+ | "long"
+ | "double"
+ ;
+
+// A (possibly-qualified) java identifier. We start with the first IDENT
+// and expand its name by adding dots and following IDENTS
+identifier
+ : IDENT ( DOT^ IDENT )*
+ ;
+
+identifierStar
+ : IDENT
+ ( DOT^ IDENT )*
+ ( DOT^ STAR )?
+ ;
+
+// A list of zero or more modifiers. We could have used (modifier)* in
+// place of a call to modifiers, but I thought it was a good idea to keep
+// this rule separate so they can easily be collected in a Vector if
+// someone so desires
+modifiers
+ : ( modifier )*
+ {#modifiers = #([MODIFIERS, "MODIFIERS"], #modifiers);}
+ ;
+
+// modifiers for Java classes, interfaces, class/instance vars and methods
+modifier
+ : "private"
+ | "public"
+ | "protected"
+ | "static"
+ | "transient"
+ | "final"
+ | "abstract"
+ | "native"
+ | "threadsafe"
+ | "synchronized"
+// | "const" // reserved word, but not valid
+ | "volatile"
+ | "strictfp"
+ ;
+
+// Definition of a Java class
+classDefinition![AST modifiers]
+ : "class" IDENT
+ // it _might_ have a superclass...
+ sc:superClassClause
+ // it might implement some interfaces...
+ ic:implementsClause
+ // now parse the body of the class
+ cb:classBlock
+ {#classDefinition = #(#[CLASS_DEF,"CLASS_DEF"],
+ modifiers,IDENT,sc,ic,cb);}
+ ;
+
+superClassClause!
+ : ( "extends" id:identifier )?
+ {#superClassClause = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"],id);}
+ ;
+
+// Definition of a Java Interface
+interfaceDefinition![AST modifiers]
+ : "interface" IDENT
+ // it might extend some other interfaces
+ ie:interfaceExtends
+ // now parse the body of the interface (looks like a class...)
+ cb:classBlock
+ {#interfaceDefinition = #(#[INTERFACE_DEF,"INTERFACE_DEF"],
+ modifiers,IDENT,ie,cb);}
+ ;
+
+
+// This is the body of a class. You can have fields and extra semicolons,
+// That's about it (until you see what a field is...)
+classBlock
+ : LCURLY! { blockDepth++; }
+ ( field | SEMI! )*
+ RCURLY! { blockDepth--; }
+ {#classBlock = #([OBJBLOCK, "OBJBLOCK"], #classBlock);}
+ ;
+
+// An interface can extend several other interfaces...
+interfaceExtends
+ : (
+ e:"extends"!
+ identifier ( COMMA! identifier )*
+ )?
+ {#interfaceExtends = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"],
+ #interfaceExtends);}
+ ;
+
+// A class can implement several interfaces...
+implementsClause
+ : (
+ i:"implements"! identifier ( COMMA! identifier )*
+ )?
+ {#implementsClause = #(#[IMPLEMENTS_CLAUSE,"IMPLEMENTS_CLAUSE"],
+ #implementsClause);}
+ ;
+
+// Now the various things that can be defined inside a class or interface...
+// Note that not all of these are really valid in an interface (constructors,
+// for example), and if this grammar were used for a compiler there would
+// need to be some semantic checks to make sure we're doing the right thing...
+field!
+ : // method, constructor, or variable declaration
+ mods:modifiers
+ ( h:ctorHead s:constructorBody // constructor
+ {#field = #(#[CTOR_DEF,"CTOR_DEF"], mods, h, s);}
+
+ | cd:classDefinition[#mods] // inner class
+ {#field = #cd;}
+
+ | id:interfaceDefinition[#mods] // inner interface
+ {#field = #id;}
+
+ | t:typeSpec[false] // method or variable declaration(s)
+ ( fn:IDENT // the name of the method
+
+ // parse the formal parameter declarations.
+ LPAREN! param:parameterDeclarationList RPAREN!
+
+ rt:declaratorBrackets[#t]
+
+ // get the list of exceptions that this method is
+ // declared to throw
+ (tc:throwsClause)?
+
+ ( s2:compoundStatement | SEMI )
+ {#field = #(#[METHOD_DEF,"METHOD_DEF"],
+ mods,
+ #(#[TYPE,"TYPE"],rt),
+ fn,
+ param,
+ tc,
+ s2);
+ if(blockDepth==1) {
+ functionNames.add(fn.getText()); } }
+ | v:variableDefinitions[#mods,#t] SEMI
+// {#field = #(#[VARIABLE_DEF,"VARIABLE_DEF"], v);}
+ {#field = #v;}
+ )
+ )
+
+ // "static { ... }" class initializer
+ | "static" s3:compoundStatement
+ {#field = #(#[STATIC_INIT,"STATIC_INIT"], s3);}
+
+ // "{ ... }" instance initializer
+ | s4:compoundStatement
+ {#field = #(#[INSTANCE_INIT,"INSTANCE_INIT"], s4);}
+ ;
+
+constructorBody
+ : lc:LCURLY^ {#lc.setType(SLIST); blockDepth++; }
+ ( options { greedy=true; } : explicitConstructorInvocation)?
+ (statement)*
+ RCURLY! { blockDepth--; }
+ ;
+
+/** Catch obvious constructor calls, but not the expr.super(...) calls */
+explicitConstructorInvocation
+ : "this"! lp1:LPAREN^ argList RPAREN! SEMI!
+ {#lp1.setType(CTOR_CALL);}
+ | "super"! lp2:LPAREN^ argList RPAREN! SEMI!
+ {#lp2.setType(SUPER_CTOR_CALL);}
+ ;
+
+variableDefinitions[AST mods, AST t]
+ : variableDeclarator[getASTFactory().dupTree(mods),
+ getASTFactory().dupTree(t)]
+ ( COMMA!
+ variableDeclarator[getASTFactory().dupTree(mods),
+ getASTFactory().dupTree(t)]
+ )*
+ ;
+
+/** Declaration of a variable. This can be a class/instance variable,
+ * or a local variable in a method
+ * It can also include possible initialization.
+ */
+variableDeclarator![AST mods, AST t]
+ : id:IDENT d:declaratorBrackets[t] v:varInitializer
+ {#variableDeclarator = #(#[VARIABLE_DEF,"VARIABLE_DEF"], mods, #(#[TYPE,"TYPE"],d), id, v);
+ if(blockDepth==1) {
+ enumNames.add(id.getText());
+ }
+ }
+ ;
+
+declaratorBrackets[AST typ]
+ : {#declaratorBrackets=typ;}
+ (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
+ ;
+
+varInitializer
+ : ( ASSIGN^ initializer )?
+ ;
+
+// This is an initializer used to set up an array.
+arrayInitializer
+ : lc:LCURLY^ {#lc.setType(ARRAY_INIT); blockDepth++; }
+ ( initializer
+ (
+ // CONFLICT: does a COMMA after an initializer start a new
+ // initializer or start the option ',' at end?
+ // ANTLR generates proper code by matching
+ // the comma as soon as possible.
+ options {
+ warnWhenFollowAmbig = false;
+ }
+ :
+ COMMA! initializer
+ )*
+ (COMMA!)?
+ )?
+ RCURLY! { blockDepth--; }
+ ;
+
+
+// The two "things" that can initialize an array element are an expression
+// and another (nested) array initializer.
+initializer
+ : expression
+ | arrayInitializer
+ ;
+
+// This is the header of a method. It includes the name and parameters
+// for the method.
+// This also watches for a list of exception classes in a "throws" clause.
+ctorHead
+ : IDENT // the name of the method
+
+ // parse the formal parameter declarations.
+ LPAREN! parameterDeclarationList RPAREN!
+
+ // get the list of exceptions that this method is declared to throw
+ (throwsClause)?
+ ;
+
+// This is a list of exception classes that the method is declared to throw
+throwsClause
+ : "throws"^ identifier ( COMMA! identifier )*
+ ;
+
+
+// A list of formal parameters
+parameterDeclarationList
+ : ( parameterDeclaration ( COMMA! parameterDeclaration )* )?
+ {#parameterDeclarationList = #(#[PARAMETERS,"PARAMETERS"],
+ #parameterDeclarationList);}
+ ;
+
+// A formal parameter.
+parameterDeclaration!
+ : pm:parameterModifier t:typeSpec[false] id:IDENT
+ pd:declaratorBrackets[#t]
+ {#parameterDeclaration = #(#[PARAMETER_DEF,"PARAMETER_DEF"],
+ pm, #([TYPE,"TYPE"],pd), id);}
+ ;
+
+parameterModifier
+ : (f:"final")?
+ {#parameterModifier = #(#[MODIFIERS,"MODIFIERS"], f);}
+ ;
+
+// Compound statement. This is used in many contexts:
+// Inside a class definition prefixed with "static":
+// it is a class initializer
+// Inside a class definition without "static":
+// it is an instance initializer
+// As the body of a method
+// As a completely indepdent braced block of code inside a method
+// it starts a new scope for variable definitions
+
+compoundStatement
+ : lc:LCURLY^ {#lc.setType(SLIST); blockDepth++; }
+ // include the (possibly-empty) list of statements
+ (statement)*
+ RCURLY! { blockDepth--; }
+ ;
+
+
+statement
+ // A list of statements in curly braces -- start a new scope!
+ : compoundStatement
+
+ // declarations are ambiguous with "ID DOT" relative to expression
+ // statements. Must backtrack to be sure. Could use a semantic
+ // predicate to test symbol table to see what the type was coming
+ // up, but that's pretty hard without a symbol table ;)
+ | (declaration)=> declaration SEMI!
+
+ // An expression statement. This could be a method call,
+ // assignment statement, or any other expression evaluated for
+ // side-effects.
+ | expression SEMI!
+
+ // class definition
+ | m:modifiers! classDefinition[#m]
+
+ // Attach a label to the front of a statement
+ | IDENT c:COLON^ {#c.setType(LABELED_STAT);} statement
+
+ // If-else statement
+ | "if"^ LPAREN! expression RPAREN! statement
+ (
+ // CONFLICT: the old "dangling-else" problem...
+ // ANTLR generates proper code matching
+ // as soon as possible. Hush warning.
+ options {
+ warnWhenFollowAmbig = false;
+ }
+ :
+ "else"! statement
+ )?
+
+ // For statement
+ | "for"^
+ LPAREN!
+ forInit SEMI! // initializer
+ forCond SEMI! // condition test
+ forIter // updater
+ RPAREN!
+ statement // statement to loop over
+
+ // While statement
+ | "while"^ LPAREN! expression RPAREN! statement
+
+ // do-while statement
+ | "do"^ statement "while"! LPAREN! expression RPAREN! SEMI!
+
+ // get out of a loop (or switch)
+ | "break"^ (IDENT)? SEMI!
+
+ // do next iteration of a loop
+ | "continue"^ (IDENT)? SEMI!
+
+ // Return an expression
+ | "return"^ (expression)? SEMI!
+
+ // switch/case statement
+ | "switch"^ LPAREN! expression RPAREN! LCURLY! { blockDepth++; }
+ ( casesGroup )*
+ RCURLY! { blockDepth--; }
+
+ // exception try-catch block
+ | tryBlock
+
+ // throw an exception
+ | "throw"^ expression SEMI!
+
+ // synchronize a statement
+ | "synchronized"^ LPAREN! expression RPAREN! compoundStatement
+
+ // asserts (uncomment if you want 1.4 compatibility)
+ // | "assert"^ expression ( COLON! expression )? SEMI!
+
+ // empty statement
+ | s:SEMI {#s.setType(EMPTY_STAT);}
+ ;
+
+casesGroup
+ : ( // CONFLICT: to which case group do the statements bind?
+ // ANTLR generates proper code: it groups the
+ // many "case"/"default" labels together then
+ // follows them with the statements
+ options {
+ greedy = true;
+ }
+ :
+ aCase
+ )+
+ caseSList
+ {#casesGroup = #([CASE_GROUP, "CASE_GROUP"], #casesGroup);}
+ ;
+
+aCase
+ : ("case"^ expression | "default") COLON!
+ ;
+
+caseSList
+ : (statement)*
+ {#caseSList = #(#[SLIST,"SLIST"],#caseSList);}
+ ;
+
+// The initializer for a for loop
+forInit
+ // if it looks like a declaration, it is
+ : ( (declaration)=> declaration
+ // otherwise it could be an expression list...
+ | expressionList
+ )?
+ {#forInit = #(#[FOR_INIT,"FOR_INIT"],#forInit);}
+ ;
+
+forCond
+ : (expression)?
+ {#forCond = #(#[FOR_CONDITION,"FOR_CONDITION"],#forCond);}
+ ;
+
+forIter
+ : (expressionList)?
+ {#forIter = #(#[FOR_ITERATOR,"FOR_ITERATOR"],#forIter);}
+ ;
+
+// an exception handler try/catch block
+tryBlock
+ : "try"^ compoundStatement
+ (handler)*
+ ( finallyClause )?
+ ;
+
+finallyClause
+ : "finally"^ compoundStatement
+ ;
+
+// an exception handler
+handler
+ : "catch"^ LPAREN! parameterDeclaration RPAREN! compoundStatement
+ ;
+
+
+// expressions
+// Note that most of these expressions follow the pattern
+// thisLevelExpression :
+// nextHigherPrecedenceExpression
+// (OPERATOR nextHigherPrecedenceExpression)*
+// which is a standard recursive definition for a parsing an expression.
+// The operators in java have the following precedences:
+// lowest (13) = *= /= %= += -= <<= >>= >>>= &= ^= |=
+// (12) ?:
+// (11) ||
+// (10) &&
+// ( 9) |
+// ( 8) ^
+// ( 7) &
+// ( 6) == !=
+// ( 5) < <= > >=
+// ( 4) << >>
+// ( 3) +(binary) -(binary)
+// ( 2) * / %
+// ( 1) ++ -- +(unary) -(unary) ~ ! (type)
+// [] () (method call) . (dot -- identifier qualification)
+// new () (explicit parenthesis)
+//
+// the last two are not usually on a precedence chart; I put them in
+// to point out that new has a higher precedence than '.', so you
+// can validy use
+// new Frame().show()
+//
+// Note that the above precedence levels map to the rules below...
+// Once you have a precedence chart, writing the appropriate rules as below
+// is usually very straightfoward
+
+
+
+// the mother of all expressions
+expression
+ : assignmentExpression
+ {#expression = #(#[EXPR,"EXPR"],#expression);}
+ ;
+
+
+// This is a list of expressions.
+expressionList
+ : expression (COMMA! expression)*
+ {#expressionList = #(#[ELIST,"ELIST"], expressionList);}
+ ;
+
+
+// assignment expression (level 13)
+assignmentExpression
+ : conditionalExpression
+ ( ( ASSIGN^
+ | PLUS_ASSIGN^
+ | MINUS_ASSIGN^
+ | STAR_ASSIGN^
+ | DIV_ASSIGN^
+ | MOD_ASSIGN^
+ | SR_ASSIGN^
+ | BSR_ASSIGN^
+ | SL_ASSIGN^
+ | BAND_ASSIGN^
+ | BXOR_ASSIGN^
+ | BOR_ASSIGN^
+ )
+ assignmentExpression
+ )?
+ ;
+
+
+// conditional test (level 12)
+conditionalExpression
+ : logicalOrExpression
+ ( QUESTION^ assignmentExpression COLON! conditionalExpression )?
+ ;
+
+
+// logical or (||) (level 11)
+logicalOrExpression
+ : logicalAndExpression (LOR^ logicalAndExpression)*
+ ;
+
+
+// logical and (&&) (level 10)
+logicalAndExpression
+ : inclusiveOrExpression (LAND^ inclusiveOrExpression)*
+ ;
+
+
+// bitwise or non-short-circuiting or (|) (level 9)
+inclusiveOrExpression
+ : exclusiveOrExpression (BOR^ exclusiveOrExpression)*
+ ;
+
+
+// exclusive or (^) (level 8)
+exclusiveOrExpression
+ : andExpression (BXOR^ andExpression)*
+ ;
+
+
+// bitwise or non-short-circuiting and (&) (level 7)
+andExpression
+ : equalityExpression (BAND^ equalityExpression)*
+ ;
+
+
+// equality/inequality (==/!=) (level 6)
+equalityExpression
+ : relationalExpression ((NOT_EQUAL^ | EQUAL^) relationalExpression)*
+ ;
+
+
+// boolean relational expressions (level 5)
+relationalExpression
+ : shiftExpression
+ ( ( ( LT^
+ | GT^
+ | LE^
+ | GE^
+ )
+ shiftExpression
+ )*
+ | "instanceof"^ typeSpec[true]
+ )
+ ;
+
+
+// bit shift expressions (level 4)
+shiftExpression
+ : additiveExpression ((SL^ | SR^ | BSR^) additiveExpression)*
+ ;
+
+
+// binary addition/subtraction (level 3)
+additiveExpression
+ : multiplicativeExpression ((PLUS^ | MINUS^) multiplicativeExpression)*
+ ;
+
+
+// multiplication/division/modulo (level 2)
+multiplicativeExpression
+ : unaryExpression ((STAR^ | DIV^ | MOD^ ) unaryExpression)*
+ ;
+
+unaryExpression
+ : INC^ unaryExpression
+ | DEC^ unaryExpression
+ | MINUS^ {#MINUS.setType(UNARY_MINUS);} unaryExpression
+ | PLUS^ {#PLUS.setType(UNARY_PLUS);} unaryExpression
+ | unaryExpressionNotPlusMinus
+ ;
+
+unaryExpressionNotPlusMinus
+ : BNOT^ unaryExpression
+ | LNOT^ unaryExpression
+
+ // use predicate to skip cases like: (int.class)
+ | (LPAREN builtInTypeSpec[true] RPAREN) =>
+ lpb:LPAREN^ {#lpb.setType(TYPECAST);} builtInTypeSpec[true] RPAREN!
+ unaryExpression
+
+ // Have to backtrack to see if operator follows. If no operator
+ // follows, it's a typecast. No semantic checking needed to parse.
+ // if it _looks_ like a cast, it _is_ a cast; else it's a "(expr)"
+ | (LPAREN classTypeSpec[true] RPAREN unaryExpressionNotPlusMinus)=>
+ lp:LPAREN^ {#lp.setType(TYPECAST);} classTypeSpec[true] RPAREN!
+ unaryExpressionNotPlusMinus
+
+ | postfixExpression
+ ;
+
+// qualified names, array expressions, method invocation, post inc/dec
+postfixExpression
+ :
+ /*
+ "this"! lp1:LPAREN^ argList RPAREN!
+ {#lp1.setType(CTOR_CALL);}
+
+ | "super"! lp2:LPAREN^ argList RPAREN!
+ {#lp2.setType(SUPER_CTOR_CALL);}
+ |
+ */
+ primaryExpression
+
+ (
+ /*
+ options {
+ // the use of postfixExpression in SUPER_CTOR_CALL adds DOT
+ // to the lookahead set, and gives loads of false non-det
+ // warnings.
+ // shut them off.
+ generateAmbigWarnings=false;
+ }
+ : */
+ DOT^ IDENT
+ ( lp:LPAREN^ {#lp.setType(METHOD_CALL);}
+ argList
+ RPAREN!
+ )?
+ | DOT^ "this"
+
+ | DOT^ "super"
+ ( // (new Outer()).super() (create enclosing instance)
+ lp3:LPAREN^ argList RPAREN!
+ {#lp3.setType(SUPER_CTOR_CALL);}
+ | DOT^ IDENT
+ ( lps:LPAREN^ {#lps.setType(METHOD_CALL);}
+ argList
+ RPAREN!
+ )?
+ )
+ | DOT^ newExpression
+ | lb:LBRACK^ {#lb.setType(INDEX_OP);} expression RBRACK!
+ )*
+
+ ( // possibly add on a post-increment or post-decrement.
+ // allows INC/DEC on too much, but semantics can check
+ in:INC^ {#in.setType(POST_INC);}
+ | de:DEC^ {#de.setType(POST_DEC);}
+ )?
+ ;
+
+// the basic element of an expression
+primaryExpression
+ : identPrimary ( options {greedy=true;} : DOT^ "class" )?
+ | constant
+ | "true"
+ | "false"
+ | "null"
+ | newExpression
+ | "this"
+ | "super"
+ | LPAREN! assignmentExpression RPAREN!
+ // look for int.class and int[].class
+ | builtInType
+ ( lbt:LBRACK^ {#lbt.setType(ARRAY_DECLARATOR);} RBRACK! )*
+ DOT^ "class"
+ ;
+
+/** Match a, a.b.c refs, a.b.c(...) refs, a.b.c[], a.b.c[].class,
+ * and a.b.c.class refs. Also this(...) and super(...). Match
+ * this or super.
+ */
+identPrimary
+ : IDENT
+ (
+ options {
+ // .ident could match here or in postfixExpression.
+ // We do want to match here. Turn off warning.
+ greedy=true;
+ }
+ : DOT^ IDENT
+ )*
+ (
+ options {
+ // ARRAY_DECLARATOR here conflicts with INDEX_OP in
+ // postfixExpression on LBRACK RBRACK.
+ // We want to match [] here, so greedy. This overcomes
+ // limitation of linear approximate lookahead.
+ greedy=true;
+ }
+ : ( lp:LPAREN^ {#lp.setType(METHOD_CALL);} argList RPAREN! )
+ | ( options {greedy=true;} :
+ lbc:LBRACK^ {#lbc.setType(ARRAY_DECLARATOR);} RBRACK!
+ )+
+ )?
+ ;
+
+/** object instantiation.
+ * Trees are built as illustrated by the following input/tree pairs:
+ *
+ * new T()
+ *
+ * new
+ * |
+ * T -- ELIST
+ * |
+ * arg1 -- arg2 -- .. -- argn
+ *
+ * new int[]
+ *
+ * new
+ * |
+ * int -- ARRAY_DECLARATOR
+ *
+ * new int[] {1,2}
+ *
+ * new
+ * |
+ * int -- ARRAY_DECLARATOR -- ARRAY_INIT
+ * |
+ * EXPR -- EXPR
+ * | |
+ * 1 2
+ *
+ * new int[3]
+ * new
+ * |
+ * int -- ARRAY_DECLARATOR
+ * |
+ * EXPR
+ * |
+ * 3
+ *
+ * new int[1][2]
+ *
+ * new
+ * |
+ * int -- ARRAY_DECLARATOR
+ * |
+ * ARRAY_DECLARATOR -- EXPR
+ * | |
+ * EXPR 1
+ * |
+ * 2
+ *
+ */
+newExpression
+ : "new"^ type
+ ( LPAREN! argList RPAREN! (classBlock)?
+
+ //java 1.1
+ // Note: This will allow bad constructs like
+ // new int[4][][3] {exp,exp}.
+ // There needs to be a semantic check here...
+ // to make sure:
+ // a) [ expr ] and [ ] are not mixed
+ // b) [ expr ] and an init are not used together
+
+ | newArrayDeclarator (arrayInitializer)?
+ )
+ ;
+
+argList
+ : ( expressionList
+ | /*nothing*/
+ {#argList = #[ELIST,"ELIST"];}
+ )
+ ;
+
+newArrayDeclarator
+ : (
+ // CONFLICT:
+ // newExpression is a primaryExpression which can be
+ // followed by an array index reference. This is ok,
+ // as the generated code will stay in this loop as
+ // long as it sees an LBRACK (proper behavior)
+ options {
+ warnWhenFollowAmbig = false;
+ }
+ :
+ lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);}
+ (expression)?
+ RBRACK!
+ )+
+ ;
+
+constant
+ : NUM_INT
+ | CHAR_LITERAL
+ | STRING_LITERAL
+ | NUM_FLOAT
+ | NUM_LONG
+ | NUM_DOUBLE
+ ;
+
+//----------------------------------------------------------------------------
+// The Java scanner
+//----------------------------------------------------------------------------
+class JavaLexer extends Lexer;
+
+options {
+ exportVocab=Java; // call the vocabulary "Java"
+ testLiterals=false; // don't automatically test for literals
+ k=4; // four characters of lookahead
+ charVocabulary='\u0003'..'\u7FFE';
+ // without inlining some bitset tests, couldn't do unicode;
+ // I need to make ANTLR generate smaller bitsets; see
+ // bottom of JavaLexer.java
+ codeGenBitsetTestThreshold=20;
+}
+
+// OPERATORS
+QUESTION : '?' ;
+LPAREN : '(' ;
+RPAREN : ')' ;
+LBRACK : '[' ;
+RBRACK : ']' ;
+LCURLY : '{' ;
+RCURLY : '}' ;
+COLON : ':' ;
+COMMA : ',' ;
+//DOT : '.' ;
+ASSIGN : '=' ;
+EQUAL : "==" ;
+LNOT : '!' ;
+BNOT : '~' ;
+NOT_EQUAL : "!=" ;
+DIV : '/' ;
+DIV_ASSIGN : "/=" ;
+PLUS : '+' ;
+PLUS_ASSIGN : "+=" ;
+INC : "++" ;
+MINUS : '-' ;
+MINUS_ASSIGN : "-=" ;
+DEC : "--" ;
+STAR : '*' ;
+STAR_ASSIGN : "*=" ;
+MOD : '%' ;
+MOD_ASSIGN : "%=" ;
+SR : ">>" ;
+SR_ASSIGN : ">>=" ;
+BSR : ">>>" ;
+BSR_ASSIGN : ">>>=" ;
+GE : ">=" ;
+GT : ">" ;
+SL : "<<" ;
+SL_ASSIGN : "<<=" ;
+LE : "<=" ;
+LT : '<' ;
+BXOR : '^' ;
+BXOR_ASSIGN : "^=" ;
+BOR : '|' ;
+BOR_ASSIGN : "|=" ;
+LOR : "||" ;
+BAND : '&' ;
+BAND_ASSIGN : "&=" ;
+LAND : "&&" ;
+SEMI : ';' ;
+
+
+// Whitespace -- ignored
+WS : ( ' '
+ | '\t'
+ | '\f'
+ // handle newlines
+ | ( options {generateAmbigWarnings=false;}
+ : "\r\n" // Evil DOS
+ | '\r' // Macintosh
+ | '\n' // Unix (the right way)
+ )
+ { newline(); }
+ )+
+ { _ttype = Token.SKIP; }
+ ;
+
+// Single-line comments
+SL_COMMENT
+ : "//"
+ (~('\n'|'\r'))* ('\n'|'\r'('\n')?)?
+ {$setType(Token.SKIP); newline();}
+ ;
+
+// multiple-line comments
+ML_COMMENT
+ : "/*"
+ ( /* '\r' '\n' can be matched in one alternative or by matching
+ '\r' in one iteration and '\n' in another. I am trying to
+ handle any flavor of newline that comes in, but the language
+ that allows both "\r\n" and "\r" and "\n" to all be valid
+ newline is ambiguous. Consequently, the resulting grammar
+ must be ambiguous. I'm shutting this warning off.
+ */
+ options {
+ generateAmbigWarnings=false;
+ }
+ :
+ { LA(2)!='/' }? '*'
+ | '\r' '\n' {newline();}
+ | '\r' {newline();}
+ | '\n' {newline();}
+ | ~('*'|'\n'|'\r')
+ )*
+ "*/"
+ {$setType(Token.SKIP);}
+ ;
+
+
+// character literals
+CHAR_LITERAL
+ : '\'' ( ESC | ~('\''|'\n'|'\r'|'\\') ) '\''
+ ;
+
+// string literals
+STRING_LITERAL
+ : '"' (ESC|~('"'|'\\'|'\n'|'\r'))* '"'
+ ;
+
+
+// escape sequence -- note that this is protected; it can only be called
+// from another lexer rule -- it will not ever directly return a token to
+// the parser
+// There are various ambiguities hushed in this rule. The optional
+// '0'...'9' digit matches should be matched here rather than letting
+// them go back to STRING_LITERAL to be matched. ANTLR does the
+// right thing by matching immediately; hence, it's ok to shut off
+// the FOLLOW ambig warnings.
+protected
+ESC
+ : '\\'
+ ( 'n'
+ | 'r'
+ | 't'
+ | 'b'
+ | 'f'
+ | '"'
+ | '\''
+ | '\\'
+ | ('u')+ HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
+ | '0'..'3'
+ (
+ options {
+ warnWhenFollowAmbig = false;
+ }
+ : '0'..'7'
+ (
+ options {
+ warnWhenFollowAmbig = false;
+ }
+ : '0'..'7'
+ )?
+ )?
+ | '4'..'7'
+ (
+ options {
+ warnWhenFollowAmbig = false;
+ }
+ : '0'..'7'
+ )?
+ )
+ ;
+
+
+// hexadecimal digit (again, note it's protected!)
+protected
+HEX_DIGIT
+ : ('0'..'9'|'A'..'F'|'a'..'f')
+ ;
+
+
+// an identifier. Note that testLiterals is set to true! This means
+// that after we match the rule, we look in the literals table to see
+// if it's a literal or really an identifer
+IDENT
+ options {testLiterals=true;}
+ : ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')*
+ ;
+
+
+// a numeric literal
+NUM_INT
+ {boolean isDecimal=false; Token t=null;}
+ : '.' {_ttype = DOT;}
+ ( ('0'..'9')+ (EXPONENT)? (f1:FLOAT_SUFFIX {t=f1;})?
+ {
+ if (t != null && t.getText().toUpperCase().indexOf('F')>=0) {
+ _ttype = NUM_FLOAT;
+ }
+ else {
+ _ttype = NUM_DOUBLE; // assume double
+ }
+ }
+ )?
+
+ | ( '0' {isDecimal = true;} // special case for just '0'
+ ( ('x'|'X')
+ ( // hex
+ // the 'e'|'E' and float suffix stuff look
+ // like hex digits, hence the (...)+ doesn't
+ // know when to stop: ambig. ANTLR resolves
+ // it correctly by matching immediately. It
+ // is therefor ok to hush warning.
+ options {
+ warnWhenFollowAmbig=false;
+ }
+ : HEX_DIGIT
+ )+
+
+ | //float or double with leading zero
+ (('0'..'9')+ ('.'|EXPONENT|FLOAT_SUFFIX)) => ('0'..'9')+
+
+ | ('0'..'7')+ // octal
+ )?
+ | ('1'..'9') ('0'..'9')* {isDecimal=true;} // non-zero decimal
+ )
+ ( ('l'|'L') { _ttype = NUM_LONG; }
+
+ // only check to see if it's a float if looks like decimal so far
+ | {isDecimal}?
+ ( '.' ('0'..'9')* (EXPONENT)? (f2:FLOAT_SUFFIX {t=f2;})?
+ | EXPONENT (f3:FLOAT_SUFFIX {t=f3;})?
+ | f4:FLOAT_SUFFIX {t=f4;}
+ )
+ {
+ if (t != null && t.getText().toUpperCase() .indexOf('F') >= 0) {
+ _ttype = NUM_FLOAT;
+ }
+ else {
+ _ttype = NUM_DOUBLE; // assume double
+ }
+ }
+ )?
+ ;
+
+
+// a couple protected methods to assist in matching floating point numbers
+protected
+EXPONENT
+ : ('e'|'E') ('+'|'-')? ('0'..'9')+
+ ;
+
+
+protected
+FLOAT_SUFFIX
+ : 'f'|'F'|'d'|'D'
+ ;
+
diff --git a/src/java/com/sun/gluegen/jgram/Test.java b/src/java/com/sun/gluegen/jgram/Test.java
new file mode 100644
index 0000000..996d645
--- /dev/null
+++ b/src/java/com/sun/gluegen/jgram/Test.java
@@ -0,0 +1,132 @@
+package com.sun.gluegen.jgram;
+
+import java.util.*;
+
+import java.io.*;
+// import antlr.collections.AST;
+import antlr.collections.impl.*;
+import antlr.debug.misc.*;
+import antlr.*;
+// import java.awt.event.*;
+
+class Test {
+
+ static boolean showTree = false;
+ public static void main(String[] args) {
+ // Use a try/catch block for parser exceptions
+ try {
+ // if we have at least one command-line argument
+ if (args.length > 0 ) {
+ System.err.println("Parsing...");
+
+ // for each directory/file specified on the command line
+ for(int i=0; i< args.length;i++) {
+ if ( args[i].equals("-showtree") ) {
+ showTree = true;
+ }
+ else {
+ doFile(new File(args[i])); // parse it
+ }
+ } }
+ else
+ System.err.println("Usage: java com.sun.gluegen.jgram.Test [-showtree] "+
+ "<directory or file name>");
+ }
+ catch(Exception e) {
+ System.err.println("exception: "+e);
+ e.printStackTrace(System.err); // so we can get stack trace
+ }
+ }
+
+
+ // This method decides what action to take based on the type of
+ // file we are looking at
+ public static void doFile(File f)
+ throws Exception {
+ // If this is a directory, walk each file/dir in that directory
+ if (f.isDirectory()) {
+ String files[] = f.list();
+ for(int i=0; i < files.length; i++)
+ doFile(new File(f, files[i]));
+ }
+
+ // otherwise, if this is a java file, parse it!
+ else if ((f.getName().length()>5) &&
+ f.getName().substring(f.getName().length()-5).equals(".java")) {
+ System.err.println(" "+f.getAbsolutePath());
+ // parseFile(f.getName(), new FileInputStream(f));
+ parseFile(f.getName(), new BufferedReader(new FileReader(f)));
+ }
+ }
+
+ // Here's where we do the real work...
+ public static void parseFile(String f, Reader r)
+ throws Exception {
+ try {
+ // Create a scanner that reads from the input stream passed to us
+ JavaLexer lexer = new JavaLexer(r);
+ lexer.setFilename(f);
+
+ // Create a parser that reads from the scanner
+ JavaParser parser = new JavaParser(lexer);
+ parser.setFilename(f);
+
+ // start parsing at the compilationUnit rule
+ parser.compilationUnit();
+
+ Set set = parser.getParsedEnumNames();
+ System.out.println("Enums");
+ for(Iterator iter = set.iterator(); iter.hasNext(); ) {
+ String s = (String) iter.next();
+ System.out.println(s);
+ }
+ System.out.println("Functions");
+ set = parser.getParsedFunctionNames();
+ for(Iterator iter = set.iterator(); iter.hasNext(); ) {
+ String s = (String) iter.next();
+ System.out.println(s);
+ }
+
+ // do something with the tree
+ //doTreeAction(f, parser.getAST(), parser.getTokenNames());
+ }
+ catch (Exception e) {
+ System.err.println("parser exception: "+e);
+ e.printStackTrace(); // so we can get stack trace
+ }
+ }
+
+ /*
+ public static void doTreeAction(String f, AST t, String[] tokenNames) {
+ if ( t==null ) return;
+ if ( showTree ) {
+ ((CommonAST)t).setVerboseStringConversion(true, tokenNames);
+ ASTFactory factory = new ASTFactory();
+ AST r = factory.create(0,"AST ROOT");
+ r.setFirstChild(t);
+ final ASTFrame frame = new ASTFrame("Java AST", r);
+ frame.setVisible(true);
+ frame.addWindowListener(
+ new WindowAdapter() {
+ public void windowClosing (WindowEvent e) {
+ frame.setVisible(false); // hide the Frame
+ frame.dispose();
+ System.exit(0);
+ }
+ }
+ );
+ // System.out.println(t.toStringList());
+ }
+ JavaTreeParser tparse = new JavaTreeParser();
+ try {
+ tparse.compilationUnit(t);
+ // System.out.println("successful walk of result AST for "+f);
+ }
+ catch (RecognitionException e) {
+ System.err.println(e.getMessage());
+ e.printStackTrace();
+ }
+
+ } */
+}
+