1: /*
2: * Copyright (c) 1980 Regents of the University of California.
3: * All rights reserved. The Berkeley software License Agreement
4: * specifies the terms and conditions for redistribution.
5: *
6: * @(#)0.h 5.2 (Berkeley) 6/20/85
7: */
8:
9: #define DEBUG
10: #define CONSETS
11: #define CHAR
12: #define STATIC
13: #define hp21mx 0
14:
15: #include <stdio.h>
16: #include <sys/types.h>
17: #undef roundup
18:
19: typedef enum {FALSE, TRUE} bool;
20:
21: /*
22: * Option flags
23: *
24: * The following options are recognized in the text of the program
25: * and also on the command line:
26: *
27: * b block buffer the file output
28: *
29: * i make a listing of the procedures and functions in
30: * the following include files
31: *
32: * l make a listing of the program
33: *
34: * n place each include file on a new page with a header
35: *
36: * p disable post mortem and statement limit counting
37: *
38: * t disable run-time tests
39: *
40: * u card image mode; only first 72 chars of input count
41: *
42: * w suppress special diagnostic warnings
43: *
44: * z generate counters for an execution profile
45: */
46: #ifdef DEBUG
47: bool fulltrace, errtrace, testtrace, yyunique;
48: #endif DEBUG
49:
50: /*
51: * Each option has a stack of 17 option values, with opts giving
52: * the current, top value, and optstk the value beneath it.
53: * One refers to option `l' as, e.g., opt('l') in the text for clarity.
54: */
55: char opts[ 'z' - 'A' + 1];
56: short optstk[ 'z' - 'A' + 1];
57:
58: #define opt(c) opts[c-'A']
59:
60: /*
61: * Monflg is set when we are generating
62: * a pxp profile. this is set by the -z command line option.
63: */
64: bool monflg;
65:
66: /*
67: * profflag is set when we are generating a prof profile.
68: * this is set by the -p command line option.
69: */
70: #ifdef PC
71: bool profflag;
72: #endif
73:
74:
75: /*
76: * NOTES ON THE DYNAMIC NATURE OF THE DATA STRUCTURES
77: *
78: * Pi uses expandable tables for
79: * its namelist (symbol table), string table
80: * hash table, and parse tree space. The following
81: * definitions specify the size of the increments
82: * for these items in fundamental units so that
83: * each uses approximately 1024 bytes.
84: */
85:
86: #define STRINC 1024 /* string space increment */
87: #define TRINC 1024 /* tree space increment */
88: #define HASHINC 509 /* hash table size in words, each increment */
89: #define NLINC 56 /* namelist increment size in nl structs */
90:
91: /*
92: * The initial sizes of the structures.
93: * These should be large enough to compile
94: * an "average" sized program so as to minimize
95: * storage requests.
96: * On a small system or and 11/34 or 11/40
97: * these numbers can be trimmed to make the
98: * compiler smaller.
99: */
100: #define ITREE 2000
101: #define INL 200
102: #define IHASH 509
103:
104: /*
105: * The following limits on hash and tree tables currently
106: * allow approximately 1200 symbols and 20k words of tree
107: * space. The fundamental limit of 64k total data space
108: * should be exceeded well before these are full.
109: */
110: /*
111: * TABLE_MULTIPLIER is for uniformly increasing the sizes of the tables
112: */
113: #ifdef ADDR32
114: #define TABLE_MULTIPLIER 8
115: #endif ADDR32
116: #ifdef ADDR16
117: #define TABLE_MULTIPLIER 1
118: #endif ADDR16
119: #define MAXHASH (4 * TABLE_MULTIPLIER)
120: #define MAXNL (12 * TABLE_MULTIPLIER)
121: #define MAXTREE (40 * TABLE_MULTIPLIER)
122: /*
123: * MAXDEPTH is the depth of the parse stack.
124: * STACK_MULTIPLIER is for increasing its size.
125: */
126: #ifdef ADDR32
127: #define STACK_MULTIPLIER 8
128: #endif ADDR32
129: #ifdef ADDR16
130: #define STACK_MULTIPLIER 1
131: #endif ADDR16
132: #define MAXDEPTH ( 150 * STACK_MULTIPLIER )
133:
134: /*
135: * ERROR RELATED DEFINITIONS
136: */
137:
138: /*
139: * Exit statuses to pexit
140: *
141: * AOK
142: * ERRS Compilation errors inhibit obj productin
143: * NOSTART Errors before we ever got started
144: * DIED We ran out of memory or some such
145: */
146: #define AOK 0
147: #define ERRS 1
148: #define NOSTART 2
149: #define DIED 3
150:
151: bool Recovery;
152:
153: #define eholdnl() Eholdnl = TRUE
154: #define nocascade() Enocascade = TRUE
155:
156: bool Eholdnl, Enocascade;
157:
158:
159: /*
160: * The flag eflg is set whenever we have a hard error.
161: * The character in errpfx will precede the next error message.
162: * When cgenflg is set code generation is suppressed.
163: * This happens whenver we have an error (i.e. if eflg is set)
164: * and when we are walking the tree to determine types only.
165: */
166: bool eflg;
167: char errpfx;
168:
169: #define setpfx(x) errpfx = x
170:
171: #define standard() setpfx('s')
172: #define warning() setpfx('w')
173: #define recovered() setpfx('e')
174: #define continuation() setpfx(' ')
175:
176: int cgenflg;
177:
178:
179: /*
180: * The flag syneflg is used to suppress the diagnostics of the form
181: * E 10 a, defined in someprocedure, is neither used nor set
182: * when there were syntax errors in "someprocedure".
183: * In this case, it is likely that these warinings would be spurious.
184: */
185: bool syneflg;
186:
187: /*
188: * The compiler keeps its error messages in a file.
189: * The variable efil is the unit number on which
190: * this file is open for reading of error message text.
191: * Similarly, the file ofil is the unit of the file
192: * "obj" where we write the interpreter code.
193: */
194: short efil;
195:
196: #ifdef OBJ
197: short ofil;
198:
199: short obuf[518];
200: #endif
201:
202: bool Enoline;
203: #define elineoff() Enoline = TRUE
204: #define elineon() Enoline = FALSE
205:
206:
207: /*
208: * SYMBOL TABLE STRUCTURE DEFINITIONS
209: *
210: * The symbol table is henceforth referred to as the "namelist".
211: * It consists of a number of structures of the form "nl" below.
212: * These are contained in a number of segments of the symbol
213: * table which are dynamically allocated as needed.
214: * The major namelist manipulation routines are contained in the
215: * file "nl.c".
216: *
217: * The major components of a namelist entry are the "symbol", giving
218: * a pointer into the string table for the string associated with this
219: * entry and the "class" which tells which of the (currently 19)
220: * possible types of structure this is.
221: *
222: * Many of the classes use the "type" field for a pointer to the type
223: * which the entry has.
224: *
225: * Other pieces of information in more than one class include the block
226: * in which the symbol is defined, flags indicating whether the symbol
227: * has been used and whether it has been assigned to, etc.
228: *
229: * A more complete discussion of the features of the namelist is impossible
230: * here as it would be too voluminous. Refer to the "PI 1.0 Implementation
231: * Notes" for more details.
232: */
233:
234: /*
235: * The basic namelist structure.
236: * There is a union of data types defining the stored information
237: * as pointers, integers, longs, or a double.
238: *
239: * The array disptab defines the hash header for the symbol table.
240: * Symbols are hashed based on the low 6 bits of their pointer into
241: * the string table; see the routines in the file "lookup.c" and also "fdec.c"
242: * especially "funcend".
243: */
244: extern int pnumcnt;
245:
246: struct nl {
247: char *symbol;
248: char info[4];
249: struct nl *type;
250: struct nl *chain, *nl_next;
251: union {
252: struct nl *un_ptr[5];
253: int un_value[5];
254: long un_range[2];
255: double un_real;
256: struct nl *un_nptr[5]; /* Points to conformant array bounds */
257: } nl_un;
258: # ifdef PTREE
259: pPointer inTree;
260: # endif PTREE
261: };
262:
263: #define class info[0]
264: #define nl_flags info[1]
265: #define nl_block info[1]
266: #define info[2]
267: #define align_info info[3]
268:
269: #define range nl_un.un_range
270: #define value nl_un.un_value
271: #define ptr nl_un.un_ptr
272: #define real nl_un.un_real
273: #define nptr nl_un.un_nptr
274:
275: extern struct nl *nlp, *disptab[077+1], *Fp;
276: extern struct nl nl[INL];
277:
278:
279: /*
280: * NL FLAGS BITS
281: *
282: * Definitions of the usage of the bits in
283: * the nl_flags byte. Note that the low 5 bits of the
284: * byte are the "nl_block" and that some classes make use
285: * of this byte as a "width".
286: *
287: * The only non-obvious bit definition here is "NFILES"
288: * which records whether a structure contains any files.
289: * Such structures are not allowed to be dynamically allocated.
290: */
291:
292: #define BLOCKNO( flag ) ( flag & 037 )
293: #define NLFLAGS( flag ) ( flag &~ 037 )
294:
295: #define NUSED 0100
296: #define NMOD 0040
297: #define NFORWD 0200
298: #define NFILES 0200
299: #ifdef PC
300: #define NEXTERN 0001 /* flag used to mark external funcs and procs */
301: #define NLOCAL 0002 /* variable is a local */
302: #define NPARAM 0004 /* variable is a parameter */
303: #define NGLOBAL 0010 /* variable is a global */
304: #define NREGVAR 0020 /* or'ed in if variable is in a register */
305: #define NNLOCAL 0040 /* named local variable, not used in symbol table */
306: #endif PC
307:
308: /*
309: * used to mark value[ NL_FORV ] for loop variables
310: */
311: #define FORVAR 1
312:
313: /*
314: * Definition of the commonly used "value" fields.
315: * The most important one is NL_OFFS which gives
316: * the offset of a variable in its stack mark.
317: */
318: #define NL_OFFS 0
319:
320: #define NL_CNTR 1
321: #define NL_NLSTRT 2
322: #define NL_LINENO 3
323: #define NL_FVAR 3
324: #define NL_ENTLOC 4 /* FUNC, PROC - entry point */
325: #define NL_FCHAIN 4 /* FFUNC, FPROC - ptr to formals */
326:
327: #define NL_GOLEV 2
328: #define NL_GOLINE 3
329: #define NL_FORV 1
330:
331: /*
332: * nlp -> nl_un.un_ptr[] subscripts for records
333: * NL_FIELDLIST the chain of fixed fields of a record, in order.
334: * the fields are also chained through ptr[NL_FIELDLIST].
335: * this does not include the tag, or fields of variants.
336: * NL_VARNT pointer to the variants of a record,
337: * these are then chained through the .chain field.
338: * NL_VTOREC pointer from a VARNT to the RECORD that is the variant.
339: * NL_TAG pointer from a RECORD to the tagfield
340: * if there are any variants.
341: * align_info the alignment of a RECORD is in info[3].
342: */
343: #define NL_FIELDLIST 1
344: #define NL_VARNT 2
345: #define NL_VTOREC 2
346: #define NL_TAG 3
347: /* and align_info is info[3]. #defined above */
348:
349: #define NL_ELABEL 4 /* SCAL - ptr to definition of enums */
350:
351: /*
352: * For BADUSE nl structures, NL_KINDS is a bit vector
353: * indicating the kinds of illegal usages complained about
354: * so far. For kind of bad use "kind", "1 << kind" is set.
355: * The low bit is reserved as ISUNDEF to indicate whether
356: * this identifier is totally undefined.
357: */
358: #define NL_KINDS 0
359:
360: #define ISUNDEF 1
361:
362: /*
363: * variables come in three flavors: globals, parameters, locals;
364: * they can also hide in registers, but that's a different flag
365: */
366: #define PARAMVAR 1
367: #define LOCALVAR 2
368: #define GLOBALVAR 3
369: #define NAMEDLOCALVAR 4
370:
371: /*
372: * NAMELIST CLASSES
373: *
374: * The following are the namelist classes.
375: * Different classes make use of the value fields
376: * of the namelist in different ways.
377: *
378: * The namelist should be redesigned by providing
379: * a number of structure definitions with one corresponding
380: * to each namelist class, ala a variant record in Pascal.
381: */
382: #define BADUSE 0
383: #define CONST 1
384: #define TYPE 2
385: #define VAR 3
386: #define ARRAY 4
387: #define PTRFILE 5
388: #define RECORD 6
389: #define FIELD 7
390: #define PROC 8
391: #define FUNC 9
392: #define FVAR 10
393: #define REF 11
394: #define PTR 12
395: #define FILET 13
396: #define SET 14
397: #define RANGE 15
398: #define LABEL 16
399: #define WITHPTR 17
400: #define SCAL 18
401: #define STR 19
402: #define PROG 20
403: #define IMPROPER 21
404: #define VARNT 22
405: #define FPROC 23
406: #define FFUNC 24
407: #define CRANGE 25
408:
409: /*
410: * Clnames points to an array of names for the
411: * namelist classes.
412: */
413: char **clnames;
414:
415: /*
416: * PRE-DEFINED NAMELIST OFFSETS
417: *
418: * The following are the namelist offsets for the
419: * primitive types. The ones which are negative
420: * don't actually exist, but are generated and tested
421: * internally. These definitions are sensitive to the
422: * initializations in nl.c.
423: */
424: #define TFIRST -7
425: #define TFILE -7
426: #define TREC -6
427: #define TARY -5
428: #define TSCAL -4
429: #define TPTR -3
430: #define TSET -2
431: #define TSTR -1
432: #define NIL 0
433: #define TBOOL 1
434: #define TCHAR 2
435: #define TINT 3
436: #define TDOUBLE 4
437: #define TNIL 5
438: #define T1INT 6
439: #define T2INT 7
440: #define T4INT 8
441: #define T1CHAR 9
442: #define T1BOOL 10
443: #define T8REAL 11
444: #define TLAST 11
445:
446: /*
447: * SEMANTIC DEFINITIONS
448: */
449:
450: /*
451: * NOCON and SAWCON are flags in the tree telling whether
452: * a constant set is part of an expression.
453: * these are no longer used,
454: * since we now do constant sets at compile time.
455: */
456: #define NOCON 0
457: #define SAWCON 1
458:
459: /*
460: * The variable cbn gives the current block number,
461: * the variable bn is set as a side effect of a call to
462: * lookup, and is the block number of the variable which
463: * was found.
464: */
465: short bn, cbn;
466:
467: /*
468: * The variable line is the current semantic
469: * line and is set in stat.c from the numbers
470: * embedded in statement type tree nodes.
471: */
472: short line;
473:
474: /*
475: * The size of the display
476: * which defines the maximum nesting
477: * of procedures and functions allowed.
478: * Because of the flags in the current namelist
479: * this must be no greater than 32.
480: */
481: #define DSPLYSZ 20
482:
483: /*
484: * the following structure records whether a level declares
485: * any variables which are (or contain) files.
486: * this so that the runtime routines for file cleanup can be invoked.
487: */
488: bool dfiles[ DSPLYSZ ];
489:
490: /*
491: * Structure recording information about a constant
492: * declaration. It is actually the return value from
493: * the routine "gconst", but since C doesn't support
494: * record valued functions, this is more convenient.
495: */
496: struct {
497: struct nl *ctype;
498: short cival;
499: double crval;
500: char *cpval; /* note used to be int * */
501: } con;
502:
503: /*
504: * The set structure records the lower bound
505: * and upper bound with the lower bound normalized
506: * to zero when working with a set. It is set by
507: * the routine setran in var.c.
508: */
509: struct {
510: short lwrb, uprbp;
511: } set;
512:
513: /*
514: * structures of this kind are filled in by precset and used by postcset
515: * to indicate things about constant sets.
516: */
517: struct csetstr {
518: struct nl *csettype;
519: long paircnt;
520: long singcnt;
521: bool comptime;
522: };
523: /*
524: * The following flags are passed on calls to lvalue
525: * to indicate how the reference is to affect the usage
526: * information for the variable being referenced.
527: * MOD is used to set the NMOD flag in the namelist
528: * entry for the variable, ASGN permits diagnostics
529: * to be formed when a for variable is assigned to in
530: * the range of the loop.
531: */
532: #define NOFLAGS 0
533: #define MOD 01
534: #define ASGN 02
535: #define NOUSE 04
536:
537: /*
538: * the following flags are passed to lvalue and rvalue
539: * to tell them whether an lvalue or rvalue is required.
540: * the semantics checking is done according to the function called,
541: * but for pc, lvalue may put out an rvalue by indirecting afterwards,
542: * and rvalue may stop short of putting out the indirection.
543: */
544: #define LREQ 01
545: #define RREQ 02
546:
547: double MAXINT;
548: double MININT;
549:
550: /*
551: * Variables for generation of profile information.
552: * Monflg is set when we want to generate a profile.
553: * Gocnt record the total number of goto's and
554: * cnts records the current counter for generating
555: * COUNT operators.
556: */
557: short gocnt;
558: short cnts;
559:
560: /*
561: * Most routines call "incompat" rather than asking "!compat"
562: * for historical reasons.
563: */
564: #define incompat !compat
565:
566: /*
567: * Parts records which declaration parts have been seen.
568: * The grammar allows the "label" "const" "type" "var" and routine
569: * parts to be repeated and to be in any order, so that
570: * they can be detected semantically to give better
571: * error diagnostics.
572: *
573: * The flag NONLOCALVAR indicates that a non-local var has actually
574: * been used hence the display must be saved; NONLOCALGOTO indicates
575: * that a non-local goto has been done hence that a setjmp must be done.
576: */
577: int parts[ DSPLYSZ ];
578:
579: #define LPRT 0x0001
580: #define CPRT 0x0002
581: #define TPRT 0x0004
582: #define VPRT 0x0008
583: #define RPRT 0x0010
584:
585: #define NONLOCALVAR 0x0020
586: #define NONLOCALGOTO 0x0040
587:
588: /*
589: * Flags for the "you used / instead of div" diagnostic
590: */
591: bool divchk;
592: bool divflg;
593:
594: bool errcnt[DSPLYSZ];
595:
596: /*
597: * Forechain links those types which are
598: * ^ sometype
599: * so that they can be evaluated later, permitting
600: * circular, recursive list structures to be defined.
601: */
602: struct nl *forechain;
603:
604: /*
605: * Withlist links all the records which are currently
606: * opened scopes because of with statements.
607: */
608: struct nl *withlist;
609:
610: struct nl *intset;
611: struct nl *input, *output;
612: struct nl *program;
613:
614: /* progseen flag used by PC to determine if
615: * a routine segment is being compiled (and
616: * therefore no program statement seen)
617: */
618: bool progseen;
619:
620:
621: /*
622: * STRUCTURED STATEMENT GOTO CHECKING
623: *
624: * The variable level keeps track of the current
625: * "structured statement level" when processing the statement
626: * body of blocks. This is used in the detection of goto's into
627: * structured statements in a block.
628: *
629: * Each label's namelist entry contains two pieces of information
630: * related to this check. The first `NL_GOLEV' either contains
631: * the level at which the label was declared, `NOTYET' if the label
632: * has not yet been declared, or `DEAD' if the label is dead, i.e.
633: * if we have exited the level in which the label was defined.
634: *
635: * When we discover a "goto" statement, if the label has not
636: * been defined yet, then we record the current level and the current line
637: * for a later error check. If the label has been already become "DEAD"
638: * then a reference to it is an error. Now the compiler maintains,
639: * for each block, a linked list of the labels headed by "gotos[bn]".
640: * When we exit a structured level, we perform the routine
641: * ungoto in stat.c. It notices labels whose definition levels have been
642: * exited and makes them be dead. For labels which have not yet been
643: * defined, ungoto will maintain NL_GOLEV as the minimum structured level
644: * since the first usage of the label. It is not hard to see that the label
645: * must eventually be declared at this level or an outer level to this
646: * one or a goto into a structured statement will exist.
647: */
648: short level;
649: struct nl *gotos[DSPLYSZ];
650:
651: #define NOTYET 10000
652: #define DEAD 10000
653:
654: /*
655: * Noreach is true when the next statement will
656: * be unreachable unless something happens along
657: * (like exiting a looping construct) to save
658: * the day.
659: */
660: bool noreach;
661:
662: /*
663: * UNDEFINED VARIABLE REFERENCE STRUCTURES
664: */
665: struct udinfo {
666: int ud_line;
667: struct udinfo *ud_next;
668: char nullch;
669: };
670:
671: /*
672: * CODE GENERATION DEFINITIONS
673: */
674:
675: /*
676: * NSTAND is or'ed onto the abstract machine opcode
677: * for non-standard built-in procedures and functions.
678: */
679: #define NSTAND 0400
680:
681: #define codeon() cgenflg++
682: #define codeoff() --cgenflg
683: #define CGENNING ( cgenflg >= 0 )
684:
685: /*
686: * Codeline is the last lino output in the code generator.
687: * It used to be used to suppress LINO operators but no
688: * more since we now count statements.
689: * Lc is the intepreter code location counter.
690: *
691: short codeline;
692: */
693: #ifdef OBJ
694: char *lc;
695: #endif
696:
697:
698: /*
699: * Routines which need types
700: * other than "integer" to be
701: * assumed by the compiler.
702: */
703: double atof();
704: long lwidth();
705: long leven();
706: long aryconst();
707: long a8tol();
708: long roundup();
709: struct nl *tmpalloc();
710: struct nl *lookup();
711: double atof();
712: int *hash();
713: char *alloc();
714: int *pcalloc();
715: char *savestr();
716: char *esavestr();
717: char *parnam();
718: char *malloc();
719: char *getlab();
720: char *getnext();
721: char *skipbl();
722: char *nameof();
723: char *pstrcpy();
724: char *myctime();
725: char *putlab();
726: bool fcompat();
727: bool constval();
728: bool precset();
729: bool nilfnil();
730: struct nl *funccod();
731: struct nl *pcfunccod();
732: struct nl *lookup1();
733: struct nl *hdefnl();
734: struct nl *defnl();
735: struct nl *flvalue();
736: struct nl *plist();
737: struct nl *enter();
738: struct nl *nlcopy();
739: struct nl *tyrec();
740: struct nl *tyary();
741: struct nl *tyrang();
742: struct nl *tyscal();
743: struct nl *deffld();
744: struct nl *stklval();
745: struct nl *scalar();
746: struct nl *gen();
747: struct nl *stkrval();
748: struct nl *funcext();
749: struct nl *funchdr();
750: struct nl *funcbody();
751: struct nl *yybaduse();
752: struct nl *stackRV();
753: struct nl *defvnt();
754: struct nl *tyrec1();
755: struct nl *reclook();
756: struct nl *asgnop1();
757: struct nl *pcasgconf();
758: struct nl *gtype();
759: struct nl *call();
760: struct nl *lvalue();
761: struct nl *pclvalue();
762: struct nl *rvalue();
763: struct nl *cset();
764: struct nl *tycrang();
765: struct tnode *newlist();
766: struct tnode *addlist();
767: struct tnode *fixlist();
768: struct tnode *setupvar();
769: struct tnode *setuptyrec();
770: struct tnode *setupfield();
771: struct tnode *tree();
772: struct tnode *tree1();
773: struct tnode *tree2();
774: struct tnode *tree3();
775: struct tnode *tree4();
776: struct tnode *tree5();
777:
778: /*
779: * type cast NIL to keep lint happy (which is not so bad)
780: */
781: #define NLNIL ( (struct nl *) NIL )
782: #define TR_NIL ( (struct tnode *) NIL)
783:
784: /*
785: * Funny structures to use
786: * pointers in wild and wooly ways
787: */
788: struct cstruct{
789: char pchar;
790: };
791: struct {
792: short pint;
793: short pint2;
794: };
795: struct lstruct {
796: long plong;
797: };
798: struct {
799: double pdouble;
800: };
801:
802: #define OCT 1
803: #define HEX 2
804:
805: /*
806: * MAIN PROGRAM VARIABLES, MISCELLANY
807: */
808:
809: /*
810: * Variables forming a data base referencing
811: * the command line arguments with the "i" option, e.g.
812: * in "pi -i scanner.i compiler.p".
813: */
814: char **pflist;
815: short pflstc;
816: short pfcnt;
817:
818: char *filename; /* current source file name */
819: long tvec;
820: extern char *snark; /* SNARK */
821: extern char *classes[ ]; /* maps namelist classes to string names */
822:
823: #define derror error
824:
825: #ifdef PC
826:
827: /*
828: * the current function number, for [ lines
829: */
830: int ftnno;
831:
832: /*
833: * the pc output stream
834: */
835: FILE *pcstream;
836:
837: #endif PC