/* * lex.c -- Generate all of the lexical type files: parser.dlg tokens.h * * SOFTWARE RIGHTS * * We reserve no LEGAL rights to the Purdue Compiler Construction Tool * Set (PCCTS) -- PCCTS is in the public domain. An individual or * company may do whatever they wish with source code distributed with * PCCTS or the code generated by PCCTS, including the incorporation of * PCCTS, or its output, into commerical software. * * We encourage users to develop software with PCCTS. However, we do ask * that credit is given to us for developing PCCTS. By "credit", * we mean that if you incorporate our source code into one of your * programs (commercial product, research project, or otherwise) that you * acknowledge this fact somewhere in the documentation, research report, * etc... If you like PCCTS and have developed a nice tool with the * output, please mention that you developed it using PCCTS. In * addition, we ask that this header remain intact in our source code. * As long as these guidelines are kept, we expect to continue enhancing * this system and expect to make other tools available as they are * completed. * * ANTLR 1.33 * Terence Parr * Parr Research Corporation * with Purdue University and AHPCRC, University of Minnesota * 1989-2001 */ #include #include /* MR1 */ /* MR1 10-Apr-97 MR1 Replace use of __STDC__ with __USE_PROTOS */ /* MR1 */ #include "pcctscfg.h" #include "set.h" #include "syn.h" #include "hash.h" #include "generic.h" #define DLGErrorString "invalid token" /* Generate a complete lexical description of the lexemes found in the grammar */ void #ifdef __USE_PROTOS genLexDescr( void ) #else genLexDescr( ) #endif { ListNode *p; FILE *dlgFile = fopen(OutMetaName(DlgFileName), "w"); require(dlgFile!=NULL, eMsg1("genLexFile: cannot open %s", OutMetaName(DlgFileName)) ); #ifdef SPECIAL_FOPEN special_fopen_actions(OutMetaName(DlgFileName)); /* MR1 */ #endif fprintf(dlgFile, "<<\n"); fprintf(dlgFile, "/* %s -- DLG Description of scanner\n", DlgFileName); fprintf(dlgFile, " *\n"); fprintf(dlgFile, " * Generated from:"); {int i; for (i=0; i 1 ) fprintf(dlgFile, "#define LL_K %d\n", OutputLL_k); if ( DemandLookahead ) fprintf(dlgFile, "#define DEMAND_LOOK\n"); if (TraceGen) { fprintf(dlgFile,"#ifndef zzTRACE_RULES\n"); /* MR20 */ fprintf(dlgFile,"#define zzTRACE_RULES\n"); /* MR20 */ fprintf(dlgFile,"#endif\n"); /* MR22 */ }; fprintf(dlgFile, "#include \"antlr.h\"\n"); if ( GenAST ) { fprintf(dlgFile, "#include \"ast.h\"\n"); } if ( UserDefdTokens ) fprintf(dlgFile, "#include %s\n", UserTokenDefsFile); /* still need this one as it has the func prototypes */ fprintf(dlgFile, "#include \"%s\"\n", DefFileName); fprintf(dlgFile, "#include \"dlgdef.h\"\n"); fprintf(dlgFile, "LOOKAHEAD\n"); fprintf(dlgFile, "\n"); fprintf(dlgFile, "void\n"); fprintf(dlgFile, "#ifdef __USE_PROTOS\n"); fprintf(dlgFile, "zzerraction(void)\n"); fprintf(dlgFile, "#else\n"); fprintf(dlgFile, "zzerraction()\n"); fprintf(dlgFile, "#endif\n"); fprintf(dlgFile, "{\n"); fprintf(dlgFile, "\t(*zzerr)(\"%s\");\n", DLGErrorString); fprintf(dlgFile, "\tzzadvance();\n"); fprintf(dlgFile, "\tzzskip();\n"); fprintf(dlgFile, "}\n"); } fprintf(dlgFile, ">>\n\n"); /* dump all actions */ /* MR1 */ /* MR1 11-Apr-97 Provide mechanism for inserting code into DLG class */ /* MR1 via <<%%lexmember ....>> & <<%%lexprefix ...>> */ /* MR1 */ if (LexActions != NULL) { for (p = LexActions->next; p!=NULL; p=p->next) { /* MR1 */ fprintf(dlgFile, "<<%%%%lexaction\n"); dumpAction( (char *)p->elem, dlgFile, 0, -1, 0, 1 ); fprintf(dlgFile, ">>\n\n"); } }; /* MR1 */ if (GenCC) { /* MR1 */ fprintf(dlgFile,"<<%%%%parserclass %s>>\n\n",CurrentClassName); /* MR1 */ }; /* MR1 */ if (LexPrefixActions != NULL) { /* MR1 */ for (p = LexPrefixActions->next; p!=NULL; p=p->next) /* MR1 */ { /* MR1 */ fprintf(dlgFile, "<<%%%%lexprefix\n"); /* MR1 */ dumpAction( (char *)p->elem, dlgFile, 0, -1, 0, 1 ); /* MR1 */ fprintf(dlgFile, ">>\n\n"); /* MR1 */ } /* MR1 */ }; /* MR1 */ if (LexMemberActions != NULL) { /* MR1 */ for (p = LexMemberActions->next; p!=NULL; p=p->next) /* MR1 */ { /* MR1 */ fprintf(dlgFile, "<<%%%%lexmember\n"); /* MR1 */ dumpAction( (char *)p->elem, dlgFile, 0, -1, 0, 1 ); /* MR1 */ fprintf(dlgFile, ">>\n\n"); /* MR1 */ } /* MR1 */ }; /* dump all regular expression rules/actions (skip sentinel node) */ if ( ExprOrder == NULL ) { warnNoFL("no regular expressions found in grammar"); } else dumpLexClasses(dlgFile); fprintf(dlgFile, "%%%%\n"); fclose( dlgFile ); } /* For each lexical class, scan ExprOrder looking for expressions * in that lexical class. Print out only those that match. * Each element of the ExprOrder list has both an expr and an lclass * field. */ void #ifdef __USE_PROTOS dumpLexClasses( FILE *dlgFile ) #else dumpLexClasses( dlgFile ) FILE *dlgFile; #endif { int i; TermEntry *t; ListNode *p; Expr *q; for (i=0; inext; p!=NULL; p=p->next) { q = (Expr *) p->elem; if ( q->lclass != i ) continue; lexmode(i); t = (TermEntry *) hash_get(Texpr, q->expr); require(t!=NULL, eMsg1("genLexDescr: rexpr %s not in hash table",q->expr) ); if ( t->token == EpToken ) continue; fprintf(dlgFile, "%s\n\t<<\n", StripQuotes(q->expr)); /* replace " killed by StripQuotes() */ q->expr[ strlen(q->expr) ] = '"'; if ( !GenCC ) { if ( TokenString(t->token) != NULL ) fprintf(dlgFile, "\t\tNLA = %s;\n", TokenString(t->token)); else fprintf(dlgFile, "\t\tNLA = %d;\n", t->token); } if ( t->action != NULL ) dumpAction( t->action, dlgFile, 2,-1,0,1 ); if ( GenCC ) { if ( TokenString(t->token) != NULL ) fprintf(dlgFile, "\t\treturn %s;\n", TokenString(t->token)); else fprintf(dlgFile, "\t\treturn (ANTLRTokenType)%d;\n", t->token); } fprintf(dlgFile, "\t>>\n\n"); } } } /* Strip the leading path (if any) from a filename */ char * #ifdef __USE_PROTOS StripPath( char *fileName ) #else StripPath( fileName ) char *fileName; #endif { char *p; static char dirSym[2] = DirectorySymbol; if(NULL != (p = strrchr(fileName, dirSym[0]))) p++; else p = fileName; return(p); } /* Generate a list of #defines && list of struct definitions for * aggregate retv's */ void #ifdef __USE_PROTOS genDefFile( void ) #else genDefFile( ) #endif { int i; /* If C++ mode and #tokdef used, then don't need anything in here since * C++ puts all definitions in the class file name. */ if ( GenCC && UserTokenDefsFile ) return; if ( MR_Inhibit_Tokens_h_Gen) return; DefFile = fopen(OutMetaName(DefFileName), "w"); require(DefFile!=NULL, eMsg1("genDefFile: cannot open %s", OutMetaName(DefFileName)) ); #ifdef SPECIAL_FOPEN special_fopen_actions(OutMetaName(DefFileName)); /* MR1 */ #endif fprintf(DefFile, "#ifndef %s\n", StripPath(gate_symbol(DefFileName))); fprintf(DefFile, "#define %s\n", StripPath(gate_symbol(DefFileName))); fprintf(DefFile, "/* %s -- List of labelled tokens and stuff\n", DefFileName); fprintf(DefFile, " *\n"); fprintf(DefFile, " * Generated from:"); for (i=0; i1 ) { int j; /* look in all lexclasses for the reg expr */ /* MR10 Derek Pappas */ /* MR10 A #tokclass doesn't have associated regular expressions */ /* MR10 so don't warn user about it's omission */ p = (TermEntry *) hash_get(Tname, TokenString(i)); if (p != NULL && ! p->classname) { for (j=0; j=NumLexClasses ) { warnNoFL(eMsg1("token label has no associated rexpr: %s",TokenString(i))); } }; } require((p=(TermEntry *)hash_get(Tname, TokenString(i))) != NULL, "token not in sym tab when it should be"); if ( !p->classname ) { if ( GenCC ) { if ( !first ) fprintf(DefFile, ",\n"); first = 0; fprintf(DefFile, "\t%s=%d", TokenString(i), i); } else fprintf(DefFile, "#define %s %d\n", TokenString(i), i); } } } /* MR1 */ /* MR1 10-Apr-97 133MR1 Prevent use of varying sizes of integer */ /* MR1 for the enum ANTLRTokenType */ /* MR1 */ if ( GenCC ) { /* MR1 */ if ( !first ) fprintf(DefFile, ",\n"); /* MR14 */ fprintf(DefFile, "\tDLGminToken=0"); /* MR1 */ fprintf(DefFile, ",\n\tDLGmaxToken=9999};\n"); /* MR1 */ }; /* MR1 */ } if ( !GenCC ) GenRulePrototypes(DefFile, SynDiag); fprintf(DefFile, "\n#endif\n"); } void #ifdef __USE_PROTOS GenRemapFile( void ) #else GenRemapFile( ) #endif { if ( strcmp(ParserName, DefaultParserName)!=0 ) { FILE *f; int i; f = fopen(OutMetaName(RemapFileName), "w"); require(f!=NULL, eMsg1("GenRemapFile: cannot open %s", OutMetaName(RemapFileName)) ); #ifdef SPECIAL_FOPEN special_fopen_actions(OutMetaName(RemapFileName)); /* MR1 */ #endif fprintf(f, "/* %s -- List of symbols to remap\n", RemapFileName); fprintf(f, " *\n"); fprintf(f, " * Generated from:"); for (i=0; irname, ParserName, p->rname); p = (Junction *)p->p2; } } /* Generate a bunch of #defines that rename all standard symbols to be * "ParserName_symbol". The list of standard symbols to change is in * globals.c. */ void #ifdef __USE_PROTOS GenPredefinedSymbolRedefs( FILE *f ) #else GenPredefinedSymbolRedefs( f ) FILE *f; #endif { char **p; fprintf(f, "\n/* rename PCCTS-supplied symbols to be 'ParserName_symbol' */\n"); for (p = &StandardSymbols[0]; *p!=NULL; p++) { fprintf(f, "#define %s %s_%s\n", *p, ParserName, *p); } } /* Generate a bunch of #defines that rename all AST symbols to be * "ParserName_symbol". The list of AST symbols to change is in * globals.c. */ void #ifdef __USE_PROTOS GenASTSymbolRedefs( FILE *f ) #else GenASTSymbolRedefs( f ) FILE *f; #endif { char **p; fprintf(f, "\n/* rename PCCTS-supplied AST symbols to be 'ParserName_symbol' */\n"); for (p = &ASTSymbols[0]; *p!=NULL; p++) { fprintf(f, "#define %s %s_%s\n", *p, ParserName, *p); } } /* redefine all sets generated by ANTLR; WARNING: 'zzerr', 'setwd' must match * use in bits.c (DumpSetWd() etc...) */ void #ifdef __USE_PROTOS GenSetRedefs( FILE *f ) #else GenSetRedefs( f ) FILE *f; #endif { int i; for (i=1; i<=wordnum; i++) { fprintf(f, "#define setwd%d %s_setwd%d\n", i, ParserName, i); } for (i=1; i<=esetnum; i++) { fprintf(f, "#define zzerr%d %s_err%d\n", i, ParserName, i); } } /* Find all return types/parameters that require structs and def * all rules with ret types. * * This is for the declaration, not the definition. */ void #ifdef __USE_PROTOS GenRulePrototypes( FILE *f, Junction *p ) #else GenRulePrototypes( f, p ) FILE *f; Junction *p; #endif { int i; i = 1; while ( p!=NULL ) { if ( p->ret != NULL ) { /* MR23 */ if ( hasMultipleOperands(p->ret) ) { DumpRetValStruct(f, p->ret, i); } fprintf(f, "\n#ifdef __USE_PROTOS\n"); /* MR23 */ if ( hasMultipleOperands(p->ret) ) { fprintf(f, "extern struct _rv%d", i); } else { fprintf(f, "extern "); DumpType(p->ret, f); } fprintf(f, " %s%s(", RulePrefix, p->rname); DumpANSIFunctionArgDef(f,p,1 /* emit initializers ? */); fprintf(f, ";\n"); fprintf(f, "#else\n"); /* MR23 */ if ( hasMultipleOperands(p->ret) ) { fprintf(f, "extern struct _rv%d", i); } else { fprintf(f, "extern "); DumpType(p->ret, f); } fprintf(f, " %s%s();\n", RulePrefix, p->rname); fprintf(f, "#endif\n"); } else { fprintf(f, "\n#ifdef __USE_PROTOS\n"); fprintf(f, "void %s%s(", RulePrefix, p->rname); DumpANSIFunctionArgDef(f,p, 1 /* emit initializers ? */ ); fprintf(f, ";\n"); #ifdef OLD if ( p->pdecl != NULL || GenAST ) { if ( GenAST ) { fprintf(f, "AST **%s",(p->pdecl!=NULL)?",":""); } if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); } else fprintf(f, "void"); fprintf(f, ");\n"); #endif fprintf(f, "#else\n"); fprintf(f, "extern void %s%s();\n", RulePrefix, p->rname); fprintf(f, "#endif\n"); } i++; p = (Junction *)p->p2; } } /* Define all rules in the class.h file; generate any required * struct definitions first, however. */ void #ifdef __USE_PROTOS GenRuleMemberDeclarationsForCC( FILE *f, Junction *q ) #else GenRuleMemberDeclarationsForCC( f, q ) FILE *f; Junction *q; #endif { Junction *p = q; int i; fprintf(f, "private:\n"); /* Dump dflt handler declaration */ fprintf(f, "\tvoid zzdflthandlers( int _signal, int *_retsignal );\n\n"); fprintf(f, "public:\n"); /* Dump return value structs */ i = 1; while ( p!=NULL ) { if ( p->ret != NULL ) { /* MR23 */ if ( hasMultipleOperands(p->ret) ) { DumpRetValStruct(f, p->ret, i); } } i++; p = (Junction *)p->p2; } /* Dump member func defs && CONSTRUCTOR */ fprintf(f, "\t%s(ANTLRTokenBuffer *input);\n", CurrentClassName); /* fprintf(f, "\t%s(ANTLRTokenBuffer *input, ANTLRTokenType eof);\n", CurrentClassName); */ i = 1; p = q; while ( p!=NULL ) { if ( p->ret != NULL ) { /* MR23 */ if ( hasMultipleOperands(p->ret) ) { fprintf(f, "\tstruct _rv%d", i); } else { fprintf(f, "\t"); DumpType(p->ret, f); } fprintf(f, " %s%s(",RulePrefix,p->rname); DumpANSIFunctionArgDef(f,p, 1 /* emit initializers ? */ ); fprintf(f, ";\n"); #ifdef OLD if ( p->pdecl != NULL || GenAST ) { if ( GenAST ) fprintf(f, "ASTBase **%s",(p->pdecl!=NULL)?",":""); if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); } fprintf(f, ");\n"); #endif } else { fprintf(f, "\tvoid %s%s(",RulePrefix,p->rname); DumpANSIFunctionArgDef(f,p, 1 /* emit initializers ? */); fprintf(f, ";\n"); #ifdef OLD if ( p->pdecl != NULL || GenAST ) { if ( GenAST ) fprintf(f, "ASTBase **%s",(p->pdecl!=NULL)?",":""); if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); } fprintf(f, ");\n"); #endif } i++; p = (Junction *)p->p2; } } /* Given a list of ANSI-style parameter declarations, print out a * comma-separated list of the symbols (w/o types). * Basically, we look for a comma, then work backwards until start of * the symbol name. Then print it out until 1st non-alnum char. Now, * move on to next parameter. * */ /* MR5 Jan Mikkelsen 26-May-97 - added initialComma parameter */ void #ifdef __USE_PROTOS DumpListOfParmNames(char *pdecl, FILE *output, int initialComma) /* MR5 */ #else DumpListOfParmNames(pdecl, output, initialComma) /* MR5 */ char *pdecl; /* MR5 */ FILE *output; /* MR5 */ int initialComma; /* MR5 */ #endif { int firstTime = 1, done = 0; require(output!=NULL, "DumpListOfParmNames: NULL parm"); if ( pdecl == NULL ) return; while ( !done ) { if ( !firstTime || initialComma ) putc(',', output); /* MR5 */ done = DumpNextNameInDef(&pdecl, output); firstTime = 0; } } /* given a list of parameters or return values, dump the next * name to output. Return 1 if last one just printed, 0 if more to go. */ /* MR23 Total rewrite */ int #ifdef __USE_PROTOS DumpNextNameInDef( char **q, FILE *output ) #else DumpNextNameInDef( q, output ) char **q; FILE *output; #endif { char *p; char *t; char *pDataType; char *pSymbol; char *pEqualSign; char *pValue; char *pSeparator; int nest = 0; p = endFormal(*q, &pDataType, &pSymbol, &pEqualSign, &pValue, &pSeparator, &nest); /* MR26 Handle rule arguments such as: IIR_Bool (IIR_Decl::*constraint)() For this we need to strip off anything which follows the symbol. */ /* MR26 */ t = pSymbol; /* MR26 */ if (t != NULL) { /* MR26 */ for (t = pSymbol; *t != 0; t++) { /* MR26 */ if (! (isalpha(*t) || isdigit(*t) || *t == '_' || *t == '$')) break; /* MR26 */ } /* MR26 */ } /* MR26 */ fprintf(output, "%s", strBetween(pSymbol, t, pSeparator)); *q = p; return (*pSeparator == 0); } /* Given a list of ANSI-style parameter declarations, dump K&R-style * declarations, one per line for each parameter. Basically, convert * comma to semi-colon, newline. */ void #ifdef __USE_PROTOS DumpOldStyleParms( char *pdecl, FILE *output ) #else DumpOldStyleParms( pdecl, output ) char *pdecl; FILE *output; #endif { require(output!=NULL, "DumpOldStyleParms: NULL parm"); if ( pdecl == NULL ) return; while ( *pdecl != '\0' ) { if ( *pdecl == ',' ) { pdecl++; putc(';', output); putc('\n', output); while ( *pdecl==' ' || *pdecl=='\t' || *pdecl=='\n' ) pdecl++; } else {putc(*pdecl, output); pdecl++;} } putc(';', output); putc('\n', output); } /* Take in a type definition (type + symbol) and print out type only */ /* MR23 Total rewrite */ void #ifdef __USE_PROTOS DumpType( char *s, FILE *f ) #else DumpType( s, f ) char *s; FILE *f; #endif { char *p; char *pDataType; char *pSymbol; char *pEqualSign; char *pValue; char *pSeparator; int nest = 0; require(s!=NULL, "DumpType: invalid type string"); p = endFormal(s, &pDataType, &pSymbol, &pEqualSign, &pValue, &pSeparator, &nest); fprintf(f, "%s", strBetween(pDataType, pSymbol, pSeparator)); } /* check to see if string e is a word in string s */ int #ifdef __USE_PROTOS strmember( char *s, char *e ) #else strmember( s, e ) char *s; char *e; #endif { register char *p; require(s!=NULL&&e!=NULL, "strmember: NULL string"); if ( *e=='\0' ) return 1; /* empty string is always member */ do { while ( *s!='\0' && !isalnum(*s) && *s!='_' ) ++s; p = e; while ( *p!='\0' && *p==*s ) {p++; s++;} if ( *p=='\0' ) { if ( *s=='\0' ) return 1; if ( !isalnum (*s) && *s != '_' ) return 1; } while ( isalnum(*s) || *s == '_' ) ++s; } while ( *s!='\0' ); return 0; } #if 0 /* MR23 Replaced by hasMultipleOperands() */ int #ifdef __USE_PROTOS HasComma( char *s ) #else HasComma( s ) char *s; #endif { while (*s!='\0') if ( *s++ == ',' ) return 1; return 0; } #endif /* MR23 Total rewrite */ void #ifdef __USE_PROTOS DumpRetValStruct( FILE *f, char *ret, int i ) #else DumpRetValStruct( f, ret, i ) FILE *f; char *ret; int i; #endif { char *p = ret; char *pDataType; char *pSymbol; char *pEqualSign; char *pValue; char *pSeparator; int nest = 0; fprintf(f, "\nstruct _rv%d {\n", i); while (*p != 0 && nest == 0) { p = endFormal(p, &pDataType, &pSymbol, &pEqualSign, &pValue, &pSeparator, &nest); fprintf(f,"\t"); fprintf(f, "%s", strBetween(pDataType, pSymbol, pSeparator)); fprintf(f," "); fprintf(f, "%s", strBetween(pSymbol, pEqualSign, pSeparator)); fprintf(f,";\n"); } fprintf(f,"};\n"); } /* given "s" yield s -- DESTRUCTIVE (we modify s if starts with " else return s) */ char * #ifdef __USE_PROTOS StripQuotes( char *s ) #else StripQuotes( s ) char *s; #endif { if ( *s == '"' ) { s[ strlen(s)-1 ] = '\0'; /* remove last quote */ return( s+1 ); /* return address past initial quote */ } return( s ); }