/* SMALL C COMPILER - (Part 1) Original version published in Dr Dobbs Journal of Computing Calisthenics and Orthodontia, May 1980 , No. 45. by Ron Cain. Mods for CP/M and Z80 mnemonics John Hill March 1984 - Oct 1985 Some of the original routines and global variables have been renamed either to shorten them to 7 characters or less or to avoid clashes with Z80 mnemonics. Version 1.5 had hex numbers starting with '$' allowed. (Routine by P.N.Mercer) Version 1.6 includes for loops, switch/case and also argc and argv based on routines by Mike Bernson (CUG Vol 9) and do-while from Glen Fisher's version for the 8086 in SIG/M Vol 149. The number routines now support hex numbers starting with 0X or 0x. \\,\n,\l,\b,f\,\t are supported as character values. Optional file names in command line. Compilation summary to the screen. Error messages to screen with pauses. String pool increased to 4k. Version 1.7 recognises: printf,fprintf,sprintf,scanf,fscanf, sscanf as 'special' functions and causes an additional argument to be passed to allow variable numbers of arguments to be used. Optional <,>," allowed around file names. */ #asm START: ORG 100H JP ccinit ;Go and set up stack and get command line args if any #endasm #define VERSION "Small-C Compiler for Z80 - Version 1.7 (October 1985)" #define cr 13 #define lf 10 #define NULL 0 #define ERROR -1 #define FALSE 0 #define TRUE 1 #define ERRMAX 40 /* Max no of errors before aborting compilation */ /*Parameters of symbol table*/ #define symsiz 14 #define numglbs 300 #define symtbsz 5040 #define strtglb symtab #define endglb strtglb+numglbs*symsiz #define strtloc endglb+symsiz #define endloc symtab+symtbsz-symsiz #define SW_MAX 254 /*Symbol table format*/ #define name 0 #define ident 9 #define type 10 #define storage 11 #define offset 12 /*Symbol name size*/ #define namesiz 9 #define namemax 8 /*Possible values of entries in symbol table*/ /*ident*/ #define variabl 1 #define array 2 #define pointer 3 #define fnction 4 /*type*/ #define cchar 1 #define cint 2 /*storage*/ #define statik 1 #define stkloc 2 /*while statement queue parameters*/ #define wqtabsz 120 #define wqsiz 7 #define wqmax wq+wqtabsz-wqsiz #define wqsym 0 #define wqsp 1 #define wqloop 2 #define wqlab 3 #define wqlab2 4 #define wqend 5 #define wqbody 6 /*literal pool*/ #define litabsz 4000 #define litmax litabsz-1 /*input line*/ #define linesiz 100 #define linemax linesiz-1 #define mpmax linemax /*define (macro) pool*/ #define macqsiz 1000 #define macmax macqsiz-1 /*statement types*/ #define stif 1 #define stwhile 2 #define stfor 3 #define stswtch 4 #define stretrn 5 #define stbreak 6 #define stcont 7 #define stasm 8 #define stexp 9 /*globals used by compiler*/ char symtab[symtbsz];/*symbol table*/ char *glbptr,*locptr;/*pointers to next entry*/ int wq[wqtabsz];/*while queue and pointer*/ int *wqptr; char litq[litabsz];/*literal (string) pool and pointer*/ int litptr; char macq[macqsiz];/*define pool and pointer*/ int macptr; char line[linesiz];/*parsing and temporary define buffer with pointers*/ char mline[linesiz]; int lptr,mptr; int nxtlab,/*next label no. and label assigned to literal pool*/ litlab, sptr,/*relative sp and function arg sp*/ argstk, ncmp,/*no. of open compound statements*/ errcnt,/*error count*/ errstop,/*stop on error*/ eof,/*non zero after final input*/ input,/*i/o channels of input,output and #include files*/ output, input2, glbflag,/*non-zero if globals to be defined*/ ctext,/*non-zero if C to be included*/ cmode,/*non-zero when parsing C - zero for assembler*/ clargs, /*Flag =1 if command line args */ lastst;/*type of last executed statement*/ char *currfn; /*ptr to symtab entry for current function */ char quote[2];/*string for '"' */ char *cptr;/*general working pointers*/ char ipfname[20],opfname[20]; /*File names*/ int *iptr; /* Start of compiler */ main(argc,argv) int argc,argv[]; { char *ptr,*ptr2; glbptr=strtglb;/*initialise various pointers*/ locptr=strtloc; wqptr=wq; macptr= litptr= sptr= errcnt= /*no errors,open files,open statements*/ eof= errstop= /* Don't stop after errors*/ input = input2= output= ncmp= lastst= quote[1]= /* quote + 0*/ 0; if(argc==1)clargs=0; else { clargs=1; ptr=argv[1]; ptr2=ipfname; while(*ptr)*ptr2++ = *ptr++; *ptr2++ = '.'; *ptr2++ ='C'; *ptr2=0; if(argc==3)ptr=argv[2];else ptr=argv[1]; ptr2=opfname; while(*ptr) *ptr2++ = *ptr++; *ptr2++ = '.'; *ptr2++ = 'Z'; *ptr2++ = 'S';*ptr2++ ='M'; *ptr2=0; } if(argc>3){clargs=0; crlf();puts("Command line error. Lets do it the hard way."); crlf();} quote[0]='"'; currfn=NULL; cmode=1;/*set to process C */ if(clargs==0) ask();/*options*/ else { errstop=1;ctext=1;glbflag=1;nxtlab=0; pl(VERSION);crlf();litlab=getlabl();} openout();/*open files*/ openin(); header();/*header for o/p file*/ parse();/*process all input*/ dmplits();/* dump literals and global variables*/ dmpglbs(); errsmry();/*o/p error summary and close down*/ trailer(); closout(); } parse() { while (eof==0) { if(amatch("char",4)){ declglb(cchar); ns(); } else if(amatch("int",3)){ declglb(cint); ns(); } else if(match("#asm"))doasm(); else if(match("#include"))doincl(); else if(match("#define"))addmac(); else newfunc(); blanks(); } } /*dump strings with DEFB pseudo-ops*/ dmplits() { int j,k; if(litptr==0) return; crlf();puts("Dumping literals");crlf(); prlabel(litlab); col(); k=0; while (k=litptr)) { nl(); break; } outbyte(','); } } } /*dump global variables*/ dmpglbs() { int j; if(glbflag==0)return;/*don't if not wanted*/ crlf();puts("Dumping globals");crlf(); cptr=strtglb; while(cptr>8; } else error("Expecting argument name"); } else illname(); argstk=argstk-2; if(endst()) return; if(match(",")==0) error("Comma expected"); } } newfunc() { char n[namesiz],*ptr; int argtop; if(input2)putchar(9); puts("Compiling C function: "); puts(line);crlf(); if(symname(n)==0) { error("Illegal function name or declaration"); kill(); return; } if(ptr=findglb(n)) { if(ptr[ident]!=fnction){ mltidef(n); } else{ if(ptr[offset]==fnction) mltidef(n); else ptr[offset]=fnction; } } else addglb(n,fnction,cint,fnction); if(match("(")==0) error("Missing opening parenthesis"); outstr(n); col(); nl(); locptr=strtloc; argstk=0; while(match(")")==0){ if(symname(n)){ if(findloc(n)) mltidef(n); else{ addloc(n,0,0,argstk); argstk=argstk+2; } } else{ error("Illegal argument name"); junk(); } blanks(); if(streq(&line[lptr],")")==0){ if(match(",")==0) error("Comma expected in parameter list"); } if(endst()) break; } argtop=argstk; while(argstk){ if(amatch("char",4)){ getarg(cchar,argtop); ns(); continue; } if(amatch("int",3)){ getarg(cint,argtop); ns(); continue; } error("Wrong type or number of arguments"); break; } sptr=0; if(stment()!=stretrn) { modstk(0); retfn(); } sptr=0; locptr=strtloc; } stment() { if((ch()==0)&(eof)) return; else if(amatch("char",4)) { declloc(cchar); ns(); } else if(amatch("int",3)) { declloc(cint); ns(); } else if(match("{"))cmpound(); else if(amatch("if",2)) { doif(); lastst=stif; } else if(amatch("do",2)) { dodo(); lastst=stwhile; } else if(amatch("while",5)) { dowhile(); lastst=stwhile; } else if(amatch("for",3)) { dofor(); lastst=stfor; } else if(amatch("switch",6)) { doswtch(); lastst=stswtch; } else if(amatch("return",6)) { doretrn(); ns(); lastst=stretrn; } else if(amatch("break",5)) { dobreak(); ns(); lastst=stbreak; } else if(amatch("continue",8)) { docont(); ns(); lastst=stcont; } else if(match(";")); else if(match("#asm")) { doasm(); lastst=stasm; } else{ expresn(); ns(); lastst=stexp; } return lastst; } ns() { if(match(";")==0)error("Missing semicolon"); } cmpound() { ++ncmp; while(match("}")==0) if(eof) return; else stment(); --ncmp; } doif() { int flev,fsp,flab1,flab2; flev=locptr; fsp=sptr; flab1=getlabl(); test(flab1); stment(); sptr=modstk(fsp); locptr=flev; if(amatch("else",4)==0) { prlabel(flab1); col(); nl(); return; } jump(flab2=getlabl()); prlabel(flab1); col(); nl(); stment(); sptr=modstk(fsp); locptr=flev; prlabel(flab2); col(); nl(); } dodo() { int wq[wqsiz]; /* allocate local queue*/ wq[wqsym]=locptr; wq[wqsp]=sptr;wq[wqloop]=getlabl(); wq[wqlab]=getlabl(); wq[wqlab2]=getlabl(); addwhle(wq); prlabel(wq[wqlab2]);col();nl(); stment(); if(amatch("while",5)==0) {error("'while' expected."); return;} prlabel(wq[wqloop]);col();nl(); test(wq[wqlab]); jump(wq[wqlab2]); prlabel(wq[wqlab]);col();nl(); ns(); locptr=wq[wqsym]; sptr=modstk(wq[wqsp]); delwhle(); } dowhile() { int wq[4]; wq[wqsym]=locptr; wq[wqsp]=sptr; wq[wqloop]=getlabl(); wq[wqlab]=getlabl(); addwhle(wq); prlabel(wq[wqloop]); col(); nl(); test(wq[wqlab]); stment(); jump(wq[wqloop]); prlabel(wq[wqlab]); col(); nl(); locptr=wq[wqsym]; sptr=modstk(wq[wqsp]); delwhle(); } doretrn() { if(endst()==0)expresn(); modstk(0); retfn(); } dobreak() { int *ptr; if((ptr=rdwhile())==0)return; modstk((ptr[wqsp])); jump(ptr[wqlab]); } docont() { int *ptr; if((ptr=rdwhile())==0) return; modstk((ptr[wqsp])); jump(ptr[wqloop]); } doasm() { cmode=0; if(input2)putchar(9); puts("#asm");crlf(); while(1) { inline(); if(match("#endasm")) break; if(eof) break; outstr(line); nl(); } kill(); if(input2)putchar(9); puts("#endasm");crlf(); cmode=1; } callfnc(ptr) char *ptr; { int nargs; nargs=0; blanks(); if(ptr==0) fpush(); while (streq(line+lptr,")")==0) { if(endst())break; expresn(); if(ptr==0)swapstk(); fpush(); nargs=nargs+2; if(match(",")==0)break; } ndbrack(")"); /* give special treatment to printf,scanf etc */ if(ptr) {if(special(ptr)) { immed(); outdec(nargs/2); nl(); nargs=nargs+2; fpush(); } callfn(ptr); } else callstk(); sptr=modstk(sptr+nargs); } /* Check for special function names which are to be given a final extra argument equal to the number of args */ special(ptr) char *ptr; { if(astreq(ptr,"printf",6))return 1; if(astreq(ptr,"scanf",5))return 1; if(astreq(ptr,"sprintf",7))return 1; if(astreq(ptr,"sscanf",6))return 1; if(astreq(ptr,"fprintf",7))return 1; if(astreq(ptr,"fscanf",6))return 1; return 0; } junk() { if(an(inbyte())) while(an(ch()))gch(); else while(an(ch())==0) { if(ch()==0)break; gch(); } blanks(); } endst() { blanks(); return((streq(line+lptr,";")|(ch()==0))); } illname() { error("Illegal symbol name"); junk(); } mltidef(sname) char *sname; { error("Already defined"); comment(); outstr(sname); nl(); } ndbrack(str) char *str; { if(match(str)==0) { error("Missing bracket"); comment(); outstr(str); nl(); } } ndlval() { error("Must be lvalue"); } findglb(sname) char *sname; { char *ptr; ptr=strtglb; while(ptr!=glbptr) { if(astreq(sname,ptr,namemax))return ptr; ptr=ptr+symsiz; } return 0; } findloc(sname) char *sname; { char *ptr; ptr=strtloc; while(ptr!=locptr) { if(astreq(sname,ptr,namemax)) return ptr; ptr=ptr+symsiz; } return 0; } addglb(sname,id,typ,value) char *sname,id,typ; int value; { char *ptr; if(cptr=findglb(sname))return cptr; if(glbptr>=endglb) { error("Global symbol table overflow"); return 0; } cptr=ptr=glbptr; while(an(*ptr++ = *sname++)); cptr[ident]=id; cptr[type]=typ; cptr[storage]=statik; cptr[offset]=value; cptr[offset+1]=value>>8; glbptr=glbptr+symsiz; return cptr; } addloc(sname,id,typ,value) char *sname,id,typ; int value; { char *ptr; if(cptr=findloc(sname)) return cptr; if(locptr>=endloc) { error("Local symbol table overflow"); return 0; } cptr=ptr=locptr; while(an(*ptr++ = *sname++)); cptr[ident]=id; cptr[type]=typ; cptr[storage]=stkloc; cptr[offset]=value; cptr[offset+1]=value>>8; locptr=locptr+symsiz; return cptr; } symname(sname) char *sname; { int k; char c; blanks(); if(alpha(ch())==0)return 0; k=0; while(an(ch()))sname[k++]=gch(); sname[k]=0; return 1; } getlabl() { return(++nxtlab); } prlabel(label) int label; { outstr("cc"); outdec(label); } alpha(c) char c; { c=c&127; return(((c>='a')&(c<='z'))| ((c>='A')&(c<='Z'))| (c=='_')|(c=='$')); } numeric(c) char c; { c=c&127; return((c>='0')&(c<='9')); } an(c) char c; { return((alpha(c))|(numeric(c))); } pl(str) char *str; { int k; k=0; crlf(); while(str[k])putchar(str[k++]); } addwhle(ptr) int ptr[]; { int k; if(wqptr==wqmax) { error("Too many active whiles"); return; } k=0; while(k0) { if((k==cr)|(lptr>=linemax))break; line[lptr++]=k; } line[lptr]=0; if(k<=0) { fclose(unit); if(input2){input2=0; endincl();} else input=0; } if(lptr) { if((ctext)&(cmode)) { comment(); outstr(line); nl(); } lptr=0; return; } } } keepch(c) char c; { mline[mptr]=c; if(mptr=mpmax)error("Line too long"); lptr=mptr=0; while(line[lptr++]=mline[mptr++]); lptr=0; } addmac() { char sname[namesiz]; int k; if(symname(sname)==0) { illname(); kill(); return; } k=0; while(putmac(sname[k++])); while(ch()==' '|ch()==9) gch(); while(putmac(gch())); if(macptr>=macmax)error("Macro table full"); } putmac(c) char c; { macq[macptr]=c; if(macptr ");puts(ptr);crlf(); puts(line); crlf(); comment(); k=0; while(k=ERRMAX) /*Abort if too many errors*/ {crlf();crlf();puts("Too many errors.Compilation aborted"); crlf(); exit(); } if(errstop){if(keyscan()=='.')errstop=0;} } ol(ptr) char ptr[]; { ot(ptr); nl(); } ot(ptr) char ptr[]; { tab(); outstr(ptr); } /* Scan keyboard without echo */ #asm keyscan: CALL CHARIN JP ccsxt #endasm /* Routine needed for O/P of error count etc. to console. JMHH */ putdec(n) int n; { int i; if(n<0) { putchar('-'); n=-n; } /* Unlikely to be needed! */ if((i=n/10)!=0) putdec(i); putchar(n%10+'0'); } puts(str) char *str; { int k; k=0; while(str[k])putchar(str[k++]); } /* */ /* need semicolon */ /* written by Mike Bernson 1/81 */ /* */ needsem() { if (match(";")) return FALSE; error("Missing semicolon"); junk(); return TRUE; } /* */ /* need opening parn */ /* written by Mike Bernson 1/81 */ needopn() { if (match("(")) return FALSE; error("Missing left bracket"); junk(); return TRUE; } /* */ /* need closing parn */ /* written by Mike Bernson 1/81 */ needcpn() { if (match(")")) return FALSE; error("Missing right bracket"); junk(); return TRUE; } /* */ /* need opening brace */ /* written by Mike Bernson 1/81 */ needobr() { if (match("{")) return FALSE; error("Missing left brace"); junk(); return TRUE; } /* */ /* need closing brace */ /* written by Mike Bernson 1/81 */ needcbr() { if (match("}")) return FALSE; error("Missing right brace"); junk(); return TRUE; } /* */ /* "for" statement */ /* */ /* written by Mike Bernson 1/81 */ /* */ dofor() { int queue[wqsiz]; /* local queue area */ queue[wqsym]=locptr; /* save locaL LEVEL */ queue[wqsp]=sptr; /* and stack pointer */ queue[wqloop]=getlabl(); /* looping label */ queue[wqlab]=getlabl(); /* loop exit varble */ queue[wqend]=getlabl(); /* loop end label */ queue[wqbody]=getlabl(); addwhle(queue); /* add while to loop queue */ if (needopn()) { /* check for open parn */ delwhle(); /* delete for entry from queue */ return; } expresn(); /* init express */ if (needsem()) { /* check for semcol */ delwhle(); /* delete queue entry */ return; } splabel(queue[wqloop]); /* control loop label */ expresn(); /* loop control express */ testjmp(queue[wqlab]); /* see if exit time */ jump(queue[wqbody]); /* not time to exit do body */ if (needsem()) { /* check for semcol */ delwhle(); /* delete 1 queue entry */ return; } splabel(queue[wqend]); /* print end of loop */ expresn(); /* end loop expression */ jump(queue[wqloop]); /* do loop control expression */ if (needcpn()) { delwhle(); return; } splabel(queue[wqbody]); stment(); jump(queue[wqend]); splabel(queue[wqlab]); locptr=queue[wqsym]; sptr=modstk(queue[wqsp]); delwhle(); } splabel(label) int label; { prlabel(label); col();nl(); } /* "Switch" statement */ /* */ /* written by Mike Bernson 1/81 */ /* */ doswtch() { int value[SW_MAX]; /* value for case statement */ int label[SW_MAX]; /* value for each label */ int count,tenp; /* number of switches */ int end_lab; /* label for default */ int lab_sw; /* used for switch label */ int temp,val[2]; int queue[wqsiz]; /* local queue area */ count=0; /* number of case statements */ queue[wqsym]=locptr; /* local variable pointer */ queue[wqsp]=sptr; /* save current stack pointer */ end_lab= /* default exit label */ queue[wqloop]= /* looping label */ queue[wqlab]= /* loop exit variable */ queue[wqend]=getlabl(); /* continue label */ addwhle(queue); /* add to while stack */ if (needopn()) { /* check to see if"(" exits */ delwhle(); /* no delete switch entry and */ return; /* return out of switch */ } expresn(); /* expression for switch */ fpush(); if (needcpn()) { /* check for ")" */ delwhle(); /* not found delete queue entry */ return; /* and exit switch statement */ } if (needobr()) { delwhle(); return; } jump(lab_sw=getlabl()); sptr=sptr+2; while(1) { if (amatch("case",4)) { if(constex(val)==0){ error("Bad constant"); continue; } if (count