PROGRAM 1
%{ /**************************************************************************/ /* author: anthony f. ortiz */ /* program: scanner.l */ /* class: cs 6110 theory and design of compilers */ /* date: october 14, 1998 */ /* description: this program implements the scanner and symbol table */ /* part of a compiler. */ /**************************************************************************/ #include "y.tab.h" #include int hash_it (char * word); int add_symtable (char * word); int lookup_symtable (char * word); void print_symtable (); %} %% "and" { printf (" "); return (AND); } "array" { printf (" "); return (ARRAY); } "boolean" { printf (" "); return (BOOL); } "break" { printf (" "); return (BREAK); } "continue" { printf (" "); return (CONT); } "case" { printf (" "); return (CASE); } "declare" { printf (" "); return (DECLARE); } "div" { yylval.ival = DIV; printf (" "); return (MULTOP); } "do" { printf (" "); return (DO); } "else" { printf (" "); return (ELSE); } "end" { printf (" "); return (END); } "function" { printf (" "); return (FUNCTION); } "if" { printf (" "); return (IF); } "integer" { printf (" "); return (INT); } "mod" { yylval.ival = MOD; printf (" "); return (MULTOP); } "not" { printf (" "); return (NOT); } "of" { printf (" "); return (OF); } "or" { printf (" "); return (OR); } "procedure" { printf (" "); return (PROCEDURE); } "program" { printf ("\nTOKENS: "); return (PROGRAM); } "read" { printf (" "); return (READ); } "real" { printf (" "); return (REAL); } "ref" { printf (" "); return (REF); } "return" { printf (" "); return (RETURN); } "returns" { printf (" "); return (RETURNS); } "then" { printf (" "); return (THEN); } "while" { printf (" "); return (WHILE); } "write" { printf (" "); return (WRITE); } "true" { yylval.ival = 1; printf ("", yylval.ival); return (BCONST); } "false" { yylval.ival = 0; printf ("", yylval.ival); return (FALSE); } [ \n\t]+ {/* white space */} "//".*"\n" {/* comments */} ":=" { printf (" "); return (ASSGN); } ":" { printf (" "); return (COL); } "," { printf (" "); return (COMMA); } "=" { yylval.ival = EQ; printf (" "); return (RELOP); } "/" { yylval.ival = FDIV; printf (" "); return (MULTOP); } ">=" { yylval.ival = GEQ; printf (" "); return (RELOP); } ">" { yylval.ival = GR; printf (" "); return (RELOP); } [A-Za-z][A-Za-z0-9_]* { int state; state = lookup_symtable (yytext); if (state == 0) { add_symtable (yytext); } printf (" ", yytext); return (IDENT); } [0-9]+ { yylval.ival = atoi (yytext); printf (" ", yylval.ival); return (INTCONST); } "[" { printf (" "); return (LBRACK); } "{" { printf (""); return (LBRACE); } "<=" { yylval.ival = LEQ; printf (" "); return (RELOP); } "<" { yylval.ival = LESS; printf (" "); return (RELOP); } "(" { printf (" "); return (LP); } "-" { yylval.ival = MINUS; printf (" "); return (ADDOP); } "!=" { yylval.ival = NEQ; printf (" "); return (RELOP); } "+" { yylval.ival = PLUS; printf (" "); return (ADDOP); } ".." { printf (" "); return (RANGE); } "]" { printf (" "); return (RBRACK); } "}" { printf (" "); return (RBRACE); } ")" { printf (" "); return (RP); } [0-9]+[/.][0-9]+ { yylval.rval = (float) (atof (yytext)); printf ("", yylval.rval); return (REALCONST); } ";" { printf (" "); return (SC); } "*" { yylval.ival = TIMES; printf (" "); return (MULTOP); } %% /* my main function is in the parse.y file. */ /* define a linked list of words and types. */ struct word { char * word_name; struct word * next; }; struct word * word_list [211]; /* the hash table with open chaining */ extern void * malloc (); /* hash word using formula in aho, sethi, and ullman. */ int hash_it (char * word) { char * p; unsigned h = 0; unsigned g; for (p = word; * p != '\0'; p = p + 1) { h = (h << 4) + (* p); if (g = h&0xf0000000) { h = h ^ (g >> 24); h = h ^ g; } } return h % 211; } /* add identifier to symbol table. */ int add_symtable (char * word) { struct word * wp [211]; int h = hash_it (word); if (lookup_symtable (word) != 0) { printf ("word not defined"); return 0; } /* word not there, allocate a new entry and link it on the list. */ wp [h] = (struct word *) malloc (sizeof (struct word)); wp [h] -> next = word_list [h]; /* have to copy the word itself as well. */ wp [h] -> word_name = (char *) malloc (strlen (word) + 1); strcpy (wp [h] -> word_name, word); word_list [h] = wp [h]; return 1; /* it worked. */ } /* lookup identifier in symbol table, return 0 if not found, 1 if found. */ int lookup_symtable (char * word) { struct word * wp [211]; int h = hash_it (word); wp [h] = word_list [h]; /* search down the list looking for the word. */ for (; wp [h]; wp [h] = wp [h] -> next) { if (strcmp (wp [h] -> word_name, word) == 0) return 1; } return 0; /* not found. */ } /* print identifiers in symbol table. */ void print_symtable () { struct word * wp [211]; int h; printf ("\n\nSymbol Table\n\n"); /* print list. */ for (h = 0; h < 211; h++) { wp [h] = word_list [h]; for (; wp [h]; wp [h] = wp [h] -> next) { printf ("%s\n", wp [h] -> word_name); } } printf ("\n"); } /* filename: parser.y */ /* i have made the following changes to this file: i have removed the macro at the beginning. i have removed all the procedures, including the main routine. i have replaced the procedures with my own main and error routines. i have changed one of the grammer rules, procedure grammer rule needed an ident in it. i have changed one of the tokens from dotdot to range, wasn't consistent. i have changed the union from st_type to struct * word. */ /* cs 6110-01 fall 1998 */ /* this is a first approximation to a grammar for the project. * the actual grammar may change before the parsing stage is * begun. * comments will be c++-style, i.e. begin with "//" and go to end-of-line. */ %{ #include #include "defs.h" #include #include #include %} %token AND ARRAY BOOL /* "boolean" */ BREAK CONT /* "continue" */ CASE %token DECLARE DIV DO ELSE END FUNCTION IF INT /* "integer" */ %token MOD NOT OF OR PROCEDURE PROGRAM READ REAL REF RETURN RETURNS THEN %token WHILE WRITE TRUE FALSE %token ADDOP %token ASSGN /* ":=" */ %token BCONST /* boolean constant */ %token COL /* ":" */ %token COMMA /* "," */ %token EQ /* "=" */ %token FDIV /* "/" */ %token GEQ /* ">=" */ %token GR /* ">" */ %token IDENT /* identifier */ %token INTCONST /* integer constant */ %token LBRACK /* "[" */ %token LBRACE /* "{" */ %token LEQ /* "<=" */ %token LESS /* "<" */ %token LP /* "(" */ %token MINUS /* "-" */ %token MULTOP %token NEQ /* "!=" */ %token PLUS /* "+" */ %token RANGE /* ".." */ %token RBRACK /* "]" */ %token RBRACE /* "}" */ %token RELOP %token RP /* ")" */ %token REALCONST /* real constant */ %token SC /* ";" */ %token TIMES /* "*" */ /* yystype structure */ /* here, "st_type" is *my* name for my symbol_table record type. later in the project, more items can profitably be added to this union. */ %union {int ival; float rval; struct word * pval;} %left OR %left AND %nonassoc RELOP %left ADDOP %left MULTOP %right NOT %right UMINUS %% start : program; program : PROGRAM IDENT SC vardeclar procdeclist LBRACE statlist RBRACE ; vardeclar : DECLARE vardeclist END SC | /* empty */ ; vardeclist : decl vardeclist | /* empty */ ; decl : identlist COL simpletype SC | identlist COL arraytype SC ; identlist : identlist COMMA IDENT | IDENT ; simpletype : INT | REAL | BOOL ; arraytype : ARRAY LBRACK INTCONST RANGE INTCONST RBRACK OF simpletype ; procdeclist : procdecl procdeclist | /* empty */ ; procdecl : procheader vardeclar procdeclist LBRACE statlist RBRACE ; procheader : PROCEDURE IDENT params SC | FUNCTION IDENT params RETURNS simpletype SC ; params : LP paramlist RP | /* empty */ ; paramlist : parameter | paramlist COMMA parameter ; parameter : IDENT COL simpletype | REF IDENT COL simpletype ; block : LBRACE vardeclar statlist RBRACE ; statlist : statlist statement | statement ; statement : variable ASSGN expression SC | ifstatement | WHILE expression DO statlist END | CASE expression OF caselist END SC | RETURN expression SC | BREAK SC | CONT SC | READ variable SC | WRITE expression SC | procedurecall SC | block | SC /* empty */ ; ifstatement : IF expression THEN statlist elsepart ; elsepart : ELSE statlist END SC | END SC /* empty */ ; caselist : casebranch caselist | /* empty */ ; casebranch : labellist COL statement ; labellist : INTCONST COMMA labellist | INTCONST ; procedurecall : IDENT | callwithparameters ; callwithparameters : IDENT LP actuals RP ; actuals : actuals COMMA expression | expression ; variable : IDENT LBRACK expression RBRACK | IDENT | callwithparameters ; expression : expression OR expression | expression AND expression | expression RELOP expression | expression ADDOP expression | expression MULTOP expression | ADDOP expression %prec UMINUS | NOT expression | variable | BCONST | INTCONST | REALCONST | LP expression RP ; %% extern FILE * yyin; /* read from a file specified by the command line and parse. */ main (int argc, char ** argv) { if (argc > 1) { FILE *file; file = fopen (argv [1], "r"); if (!file) { fprintf (stderr, "could not open %s\n", argv [1]); } yyin = file; } yyparse (); print_symtable (); } /* print a error message. */ yyerror (s) char * s; { printf ("%s\n", s); } /* defs.h: definitions file */ typedef enum {undef_t, int_t, real_t, bool_t, funct_t, proc_t, program_t} type_t; typedef enum {false, true} bool; typedef enum {add, sub, mult, idiv, neg, less, equ, fadd, fsub, fmult, fdiv, fneg, fless, fequ, itor, mov, inxld, inxst, movim, movad, movabs, bnot, band, bor, bequ, gto, jmp, iread, rread, iwrite, rwrite, call, ret, halt} op_t; typedef struct {op_t _op; int _opd1; int _opd2; int _dest;} quad_t; typedef union {quad_t quad; int ival; float rval; bool bval;} mem_t; /* case information */ struct case_info { int s_loc; int e_loc; int label; struct case_info * next; }; /* do while start and begin info */ struct do_info { int start; int middle; int end; int brk; int cont; }; /* actuals */ struct actual_info { int loc; struct actual_info * next; }; /* information about what is stored at an address in memory */ struct info { type_t type; int first; int index; }; /* symbol table record */ struct word { char * word_name; int sc_level; type_t type; int dim; int low; int high; int first; struct p_struct * params; int ret_type; struct word * next; }; /* define a linked list of variables. */ struct v_struct { char * name; struct v_struct * next; }; /* define a linked list of parameters. */ struct p_struct { type_t type; int p_loc; int isref; struct p_struct * next; }; struct info i; #define PC mem [0].ival #define SP mem [1].ival #define op quad._op #define opd1 quad._opd1 #define opd2 quad._opd2 #define dest quad._dest #define MAXMEM 8092 #define AND 257 #define ARRAY 258 #define BOOL 259 #define BREAK 260 #define CONT 261 #define CASE 262 #define DECLARE 263 #define DIV 264 #define DO 265 #define ELSE 266 #define END 267 #define FUNCTION 268 #define IF 269 #define INT 270 #define MOD 271 #define NOT 272 #define OF 273 #define OR 274 #define PROCEDURE 275 #define PROGRAM 276 #define READ 277 #define REAL 278 #define REF 279 #define RETURN 280 #define RETURNS 281 #define THEN 282 #define WHILE 283 #define WRITE 284 #define TRUE 285 #define FALSE 286 #define ADDOP 287 #define ASSGN 288 #define BCONST 289 #define COL 290 #define COMMA 291 #define EQ 292 #define FDIV 293 #define GEQ 294 #define GE 295 #define IDENT 296 #define INTCONST 297 #define LBRACK 298 #define DBRACE 299 #define LEQ 300 #define LESS 301 #define LP 302 #define MINUS 303 #define MULTOP 304 #define NEQ 305 #define PLUS 306 #define RANGE 307 #define RBRACK 308 #define RBRACE 309 #define RELOP 310 #define RP 311 #define REALCONST 312 #define SC 313 #define TIMES 314 /* data file: test1.dat */ program test; declare x, y, z : integer; u, v : real; end; { read x; read y; write x; write y; z := 5; u := 3.4; v := 7.8; z := x + y; u := u-v; write z; write u; if x > y then u := -7; else u:= 4.3; end; while (x > 0) do x := x -1; end; if (x > y) then z := 42; end; write x; write z; write u; write v; } /* outfile: prog1.out */ TOKENS: Symbol Table u v x y z test
BACK TO CS6110 PAGE.