1*3e12c5d1SDavid du Colombier #include <u.h> 2*3e12c5d1SDavid du Colombier #include <libc.h> 3*3e12c5d1SDavid du Colombier #include <stdio.h> 4*3e12c5d1SDavid du Colombier #include "cpp.h" 5*3e12c5d1SDavid du Colombier 6*3e12c5d1SDavid du Colombier /* 7*3e12c5d1SDavid du Colombier * lexical FSM encoding 8*3e12c5d1SDavid du Colombier * when in state state, and one of the characters 9*3e12c5d1SDavid du Colombier * in ch arrives, enter nextstate. 10*3e12c5d1SDavid du Colombier * States >= S_SELF are either final, or at least require special action. 11*3e12c5d1SDavid du Colombier * In 'fsm' there is a line for each state X charset X nextstate. 12*3e12c5d1SDavid du Colombier * List chars that overwrite previous entries later (e.g. C_ALPH 13*3e12c5d1SDavid du Colombier * can be overridden by '_' by a later entry; and C_XX is the 14*3e12c5d1SDavid du Colombier * the universal set, and should always be first. 15*3e12c5d1SDavid du Colombier * States above S_SELF are represented in the big table as negative values. 16*3e12c5d1SDavid du Colombier * S_SELF and S_SELFB encode the resulting token type in the upper bits. 17*3e12c5d1SDavid du Colombier * These actions differ in that S_SELF doesn't have a lookahead char, 18*3e12c5d1SDavid du Colombier * S_SELFB does. 19*3e12c5d1SDavid du Colombier * 20*3e12c5d1SDavid du Colombier * The encoding is blown out into a big table for time-efficiency. 21*3e12c5d1SDavid du Colombier * Entries have 22*3e12c5d1SDavid du Colombier * nextstate: 6 bits; ?\ marker: 1 bit; tokentype: 9 bits. 23*3e12c5d1SDavid du Colombier */ 24*3e12c5d1SDavid du Colombier 25*3e12c5d1SDavid du Colombier #define MAXSTATE 32 26*3e12c5d1SDavid du Colombier #define ACT(tok,act) ((tok<<7)+act) 27*3e12c5d1SDavid du Colombier #define QBSBIT 0100 28*3e12c5d1SDavid du Colombier #define GETACT(st) (st>>7)&0x1ff 29*3e12c5d1SDavid du Colombier 30*3e12c5d1SDavid du Colombier #define UTF2(c) ((c)>=0xA0 && (c)<0xE0) /* 2-char UTF seq */ 31*3e12c5d1SDavid du Colombier #define UTF3(c) ((c)>=0xE0 && (c)<0xF0) /* 3-char UTF seq */ 32*3e12c5d1SDavid du Colombier 33*3e12c5d1SDavid du Colombier /* character classes */ 34*3e12c5d1SDavid du Colombier #define C_WS 1 35*3e12c5d1SDavid du Colombier #define C_ALPH 2 36*3e12c5d1SDavid du Colombier #define C_NUM 3 37*3e12c5d1SDavid du Colombier #define C_EOF 4 38*3e12c5d1SDavid du Colombier #define C_XX 5 39*3e12c5d1SDavid du Colombier 40*3e12c5d1SDavid du Colombier enum state { 41*3e12c5d1SDavid du Colombier START=0, NUM1, NUM2, NUM3, ID1, ST1, ST2, ST3, COM1, COM2, COM3, COM4, 42*3e12c5d1SDavid du Colombier CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1, 43*3e12c5d1SDavid du Colombier CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1, 44*3e12c5d1SDavid du Colombier S_SELF=MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR, 45*3e12c5d1SDavid du Colombier S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME 46*3e12c5d1SDavid du Colombier }; 47*3e12c5d1SDavid du Colombier 48*3e12c5d1SDavid du Colombier int tottok; 49*3e12c5d1SDavid du Colombier int tokkind[256]; 50*3e12c5d1SDavid du Colombier struct fsm { 51*3e12c5d1SDavid du Colombier int state; /* if in this state */ 52*3e12c5d1SDavid du Colombier uchar ch[4]; /* and see one of these characters */ 53*3e12c5d1SDavid du Colombier int nextstate; /* enter this state if +ve */ 54*3e12c5d1SDavid du Colombier }; 55*3e12c5d1SDavid du Colombier 56*3e12c5d1SDavid du Colombier /*const*/ struct fsm fsm[] = { 57*3e12c5d1SDavid du Colombier /* start state */ 58*3e12c5d1SDavid du Colombier START, { C_XX }, ACT(UNCLASS,S_SELF), 59*3e12c5d1SDavid du Colombier START, { ' ', '\t', '\v' }, WS1, 60*3e12c5d1SDavid du Colombier START, { C_NUM }, NUM1, 61*3e12c5d1SDavid du Colombier START, { '.' }, NUM3, 62*3e12c5d1SDavid du Colombier START, { C_ALPH }, ID1, 63*3e12c5d1SDavid du Colombier START, { 'L' }, ST1, 64*3e12c5d1SDavid du Colombier START, { '"' }, ST2, 65*3e12c5d1SDavid du Colombier START, { '\'' }, CC1, 66*3e12c5d1SDavid du Colombier START, { '/' }, COM1, 67*3e12c5d1SDavid du Colombier START, { EOF }, S_EOF, 68*3e12c5d1SDavid du Colombier START, { '\n' }, S_NL, 69*3e12c5d1SDavid du Colombier START, { '-' }, MINUS1, 70*3e12c5d1SDavid du Colombier START, { '+' }, PLUS1, 71*3e12c5d1SDavid du Colombier START, { '<' }, LT1, 72*3e12c5d1SDavid du Colombier START, { '>' }, GT1, 73*3e12c5d1SDavid du Colombier START, { '=' }, ASG1, 74*3e12c5d1SDavid du Colombier START, { '!' }, NOT1, 75*3e12c5d1SDavid du Colombier START, { '&' }, AND1, 76*3e12c5d1SDavid du Colombier START, { '|' }, OR1, 77*3e12c5d1SDavid du Colombier START, { '#' }, SHARP1, 78*3e12c5d1SDavid du Colombier START, { '%' }, PCT1, 79*3e12c5d1SDavid du Colombier START, { '[' }, ACT(SBRA,S_SELF), 80*3e12c5d1SDavid du Colombier START, { ']' }, ACT(SKET,S_SELF), 81*3e12c5d1SDavid du Colombier START, { '(' }, ACT(LP,S_SELF), 82*3e12c5d1SDavid du Colombier START, { ')' }, ACT(RP,S_SELF), 83*3e12c5d1SDavid du Colombier START, { '*' }, STAR1, 84*3e12c5d1SDavid du Colombier START, { ',' }, ACT(COMMA,S_SELF), 85*3e12c5d1SDavid du Colombier START, { '?' }, ACT(QUEST,S_SELF), 86*3e12c5d1SDavid du Colombier START, { ':' }, ACT(COLON,S_SELF), 87*3e12c5d1SDavid du Colombier START, { ';' }, ACT(SEMIC,S_SELF), 88*3e12c5d1SDavid du Colombier START, { '{' }, ACT(CBRA,S_SELF), 89*3e12c5d1SDavid du Colombier START, { '}' }, ACT(CKET,S_SELF), 90*3e12c5d1SDavid du Colombier START, { '~' }, ACT(TILDE,S_SELF), 91*3e12c5d1SDavid du Colombier START, { '^' }, CIRC1, 92*3e12c5d1SDavid du Colombier 93*3e12c5d1SDavid du Colombier /* saw a digit */ 94*3e12c5d1SDavid du Colombier NUM1, { C_XX }, ACT(NUMBER,S_SELFB), 95*3e12c5d1SDavid du Colombier NUM1, { C_NUM, C_ALPH, '.' }, NUM1, 96*3e12c5d1SDavid du Colombier NUM1, { 'E', 'e' }, NUM2, 97*3e12c5d1SDavid du Colombier NUM1, { '_' }, ACT(NUMBER,S_SELFB), 98*3e12c5d1SDavid du Colombier 99*3e12c5d1SDavid du Colombier /* saw possible start of exponent, digits-e */ 100*3e12c5d1SDavid du Colombier NUM2, { C_XX }, ACT(NUMBER,S_SELFB), 101*3e12c5d1SDavid du Colombier NUM2, { '+', '-' }, NUM1, 102*3e12c5d1SDavid du Colombier NUM2, { C_NUM, C_ALPH }, NUM1, 103*3e12c5d1SDavid du Colombier NUM2, { '_' }, ACT(NUMBER,S_SELFB), 104*3e12c5d1SDavid du Colombier 105*3e12c5d1SDavid du Colombier /* saw a '.', which could be a number or an operator */ 106*3e12c5d1SDavid du Colombier NUM3, { C_XX }, ACT(DOT,S_SELFB), 107*3e12c5d1SDavid du Colombier NUM3, { '.' }, DOTS1, 108*3e12c5d1SDavid du Colombier NUM3, { C_NUM }, NUM1, 109*3e12c5d1SDavid du Colombier 110*3e12c5d1SDavid du Colombier DOTS1, { C_XX }, ACT(UNCLASS, S_SELFB), 111*3e12c5d1SDavid du Colombier DOTS1, { C_NUM }, NUM1, 112*3e12c5d1SDavid du Colombier DOTS1, { '.' }, ACT(ELLIPS, S_SELF), 113*3e12c5d1SDavid du Colombier 114*3e12c5d1SDavid du Colombier /* saw a letter or _ */ 115*3e12c5d1SDavid du Colombier ID1, { C_XX }, ACT(NAME,S_NAME), 116*3e12c5d1SDavid du Colombier ID1, { C_ALPH, C_NUM }, ID1, 117*3e12c5d1SDavid du Colombier 118*3e12c5d1SDavid du Colombier /* saw L (start of wide string?) */ 119*3e12c5d1SDavid du Colombier ST1, { C_XX }, ACT(NAME,S_NAME), 120*3e12c5d1SDavid du Colombier ST1, { C_ALPH, C_NUM }, ID1, 121*3e12c5d1SDavid du Colombier ST1, { '"' }, ST2, 122*3e12c5d1SDavid du Colombier ST1, { '\'' }, CC1, 123*3e12c5d1SDavid du Colombier 124*3e12c5d1SDavid du Colombier /* saw " beginning string */ 125*3e12c5d1SDavid du Colombier ST2, { C_XX }, ST2, 126*3e12c5d1SDavid du Colombier ST2, { '"' }, ACT(STRING, S_SELF), 127*3e12c5d1SDavid du Colombier ST2, { '\\' }, ST3, 128*3e12c5d1SDavid du Colombier ST2, { '\n' }, S_STNL, 129*3e12c5d1SDavid du Colombier ST1, { EOF }, S_EOFSTR, 130*3e12c5d1SDavid du Colombier 131*3e12c5d1SDavid du Colombier /* saw \ in string */ 132*3e12c5d1SDavid du Colombier ST3, { C_XX }, ST2, 133*3e12c5d1SDavid du Colombier ST3, { '\n' }, S_STNL, 134*3e12c5d1SDavid du Colombier ST3, { EOF }, S_EOFSTR, 135*3e12c5d1SDavid du Colombier 136*3e12c5d1SDavid du Colombier /* saw ' beginning character const */ 137*3e12c5d1SDavid du Colombier CC1, { C_XX }, CC1, 138*3e12c5d1SDavid du Colombier CC1, { '\'' }, ACT(CCON, S_SELF), 139*3e12c5d1SDavid du Colombier CC1, { '\\' }, CC2, 140*3e12c5d1SDavid du Colombier CC1, { '\n' }, S_STNL, 141*3e12c5d1SDavid du Colombier CC1, { EOF }, S_EOFSTR, 142*3e12c5d1SDavid du Colombier 143*3e12c5d1SDavid du Colombier /* saw \ in ccon */ 144*3e12c5d1SDavid du Colombier CC2, { C_XX }, CC1, 145*3e12c5d1SDavid du Colombier CC2, { '\n' }, S_STNL, 146*3e12c5d1SDavid du Colombier CC2, { EOF }, S_EOFSTR, 147*3e12c5d1SDavid du Colombier 148*3e12c5d1SDavid du Colombier /* saw /, perhaps start of comment */ 149*3e12c5d1SDavid du Colombier COM1, { C_XX }, ACT(SLASH, S_SELFB), 150*3e12c5d1SDavid du Colombier COM1, { '=' }, ACT(ASSLASH, S_SELF), 151*3e12c5d1SDavid du Colombier COM1, { '*' }, COM2, 152*3e12c5d1SDavid du Colombier COM1, { '/' }, COM4, 153*3e12c5d1SDavid du Colombier 154*3e12c5d1SDavid du Colombier /* saw "/*", start of comment */ 155*3e12c5d1SDavid du Colombier COM2, { C_XX }, COM2, 156*3e12c5d1SDavid du Colombier COM2, { '\n' }, S_COMNL, 157*3e12c5d1SDavid du Colombier COM2, { '*' }, COM3, 158*3e12c5d1SDavid du Colombier COM2, { EOF }, S_EOFCOM, 159*3e12c5d1SDavid du Colombier 160*3e12c5d1SDavid du Colombier /* saw the * possibly ending a comment */ 161*3e12c5d1SDavid du Colombier COM3, { C_XX }, COM2, 162*3e12c5d1SDavid du Colombier COM3, { '\n' }, S_COMNL, 163*3e12c5d1SDavid du Colombier COM3, { '*' }, COM3, 164*3e12c5d1SDavid du Colombier COM3, { '/' }, S_COMMENT, 165*3e12c5d1SDavid du Colombier 166*3e12c5d1SDavid du Colombier /* // comment */ 167*3e12c5d1SDavid du Colombier COM4, { C_XX }, COM4, 168*3e12c5d1SDavid du Colombier COM4, { '\n' }, S_NL, 169*3e12c5d1SDavid du Colombier COM4, { EOF }, S_EOFCOM, 170*3e12c5d1SDavid du Colombier 171*3e12c5d1SDavid du Colombier /* saw white space, eat it up */ 172*3e12c5d1SDavid du Colombier WS1, { C_XX }, S_WS, 173*3e12c5d1SDavid du Colombier WS1, { ' ', '\t', '\v' }, WS1, 174*3e12c5d1SDavid du Colombier 175*3e12c5d1SDavid du Colombier /* saw -, check --, -=, -> */ 176*3e12c5d1SDavid du Colombier MINUS1, { C_XX }, ACT(MINUS, S_SELFB), 177*3e12c5d1SDavid du Colombier MINUS1, { '-' }, ACT(MMINUS, S_SELF), 178*3e12c5d1SDavid du Colombier MINUS1, { '=' }, ACT(ASMINUS,S_SELF), 179*3e12c5d1SDavid du Colombier MINUS1, { '>' }, ACT(ARROW,S_SELF), 180*3e12c5d1SDavid du Colombier 181*3e12c5d1SDavid du Colombier /* saw +, check ++, += */ 182*3e12c5d1SDavid du Colombier PLUS1, { C_XX }, ACT(PLUS, S_SELFB), 183*3e12c5d1SDavid du Colombier PLUS1, { '+' }, ACT(PPLUS, S_SELF), 184*3e12c5d1SDavid du Colombier PLUS1, { '=' }, ACT(ASPLUS, S_SELF), 185*3e12c5d1SDavid du Colombier 186*3e12c5d1SDavid du Colombier /* saw <, check <<, <<=, <= */ 187*3e12c5d1SDavid du Colombier LT1, { C_XX }, ACT(LT, S_SELFB), 188*3e12c5d1SDavid du Colombier LT1, { '<' }, LT2, 189*3e12c5d1SDavid du Colombier LT1, { '=' }, ACT(LEQ, S_SELF), 190*3e12c5d1SDavid du Colombier LT2, { C_XX }, ACT(LSH, S_SELFB), 191*3e12c5d1SDavid du Colombier LT2, { '=' }, ACT(ASLSH, S_SELF), 192*3e12c5d1SDavid du Colombier 193*3e12c5d1SDavid du Colombier /* saw >, check >>, >>=, >= */ 194*3e12c5d1SDavid du Colombier GT1, { C_XX }, ACT(GT, S_SELFB), 195*3e12c5d1SDavid du Colombier GT1, { '>' }, GT2, 196*3e12c5d1SDavid du Colombier GT1, { '=' }, ACT(GEQ, S_SELF), 197*3e12c5d1SDavid du Colombier GT2, { C_XX }, ACT(RSH, S_SELFB), 198*3e12c5d1SDavid du Colombier GT2, { '=' }, ACT(ASRSH, S_SELF), 199*3e12c5d1SDavid du Colombier 200*3e12c5d1SDavid du Colombier /* = */ 201*3e12c5d1SDavid du Colombier ASG1, { C_XX }, ACT(ASGN, S_SELFB), 202*3e12c5d1SDavid du Colombier ASG1, { '=' }, ACT(EQ, S_SELF), 203*3e12c5d1SDavid du Colombier 204*3e12c5d1SDavid du Colombier /* ! */ 205*3e12c5d1SDavid du Colombier NOT1, { C_XX }, ACT(NOT, S_SELFB), 206*3e12c5d1SDavid du Colombier NOT1, { '=' }, ACT(NEQ, S_SELF), 207*3e12c5d1SDavid du Colombier 208*3e12c5d1SDavid du Colombier /* & */ 209*3e12c5d1SDavid du Colombier AND1, { C_XX }, ACT(AND, S_SELFB), 210*3e12c5d1SDavid du Colombier AND1, { '&' }, ACT(LAND, S_SELF), 211*3e12c5d1SDavid du Colombier AND1, { '=' }, ACT(ASAND, S_SELF), 212*3e12c5d1SDavid du Colombier 213*3e12c5d1SDavid du Colombier /* | */ 214*3e12c5d1SDavid du Colombier OR1, { C_XX }, ACT(OR, S_SELFB), 215*3e12c5d1SDavid du Colombier OR1, { '|' }, ACT(LOR, S_SELF), 216*3e12c5d1SDavid du Colombier OR1, { '=' }, ACT(ASOR, S_SELF), 217*3e12c5d1SDavid du Colombier 218*3e12c5d1SDavid du Colombier /* # */ 219*3e12c5d1SDavid du Colombier SHARP1, { C_XX }, ACT(SHARP, S_SELFB), 220*3e12c5d1SDavid du Colombier SHARP1, { '#' }, ACT(DSHARP, S_SELF), 221*3e12c5d1SDavid du Colombier 222*3e12c5d1SDavid du Colombier /* % */ 223*3e12c5d1SDavid du Colombier PCT1, { C_XX }, ACT(PCT, S_SELFB), 224*3e12c5d1SDavid du Colombier PCT1, { '=' }, ACT(ASPCT, S_SELF), 225*3e12c5d1SDavid du Colombier 226*3e12c5d1SDavid du Colombier /* * */ 227*3e12c5d1SDavid du Colombier STAR1, { C_XX }, ACT(STAR, S_SELFB), 228*3e12c5d1SDavid du Colombier STAR1, { '=' }, ACT(ASSTAR, S_SELF), 229*3e12c5d1SDavid du Colombier 230*3e12c5d1SDavid du Colombier /* ^ */ 231*3e12c5d1SDavid du Colombier CIRC1, { C_XX }, ACT(CIRC, S_SELFB), 232*3e12c5d1SDavid du Colombier CIRC1, { '=' }, ACT(ASCIRC, S_SELF), 233*3e12c5d1SDavid du Colombier 234*3e12c5d1SDavid du Colombier -1 235*3e12c5d1SDavid du Colombier }; 236*3e12c5d1SDavid du Colombier 237*3e12c5d1SDavid du Colombier /* first index is char+1 (to include EOF), second is state */ 238*3e12c5d1SDavid du Colombier /* increase #states to power of 2 to encourage use of shift */ 239*3e12c5d1SDavid du Colombier short bigfsm[257][MAXSTATE]; 240*3e12c5d1SDavid du Colombier 241*3e12c5d1SDavid du Colombier void 242*3e12c5d1SDavid du Colombier expandlex(void) 243*3e12c5d1SDavid du Colombier { 244*3e12c5d1SDavid du Colombier /*const*/ struct fsm *fp; 245*3e12c5d1SDavid du Colombier int i, j, nstate; 246*3e12c5d1SDavid du Colombier 247*3e12c5d1SDavid du Colombier for (fp = fsm; fp->state>=0; fp++) { 248*3e12c5d1SDavid du Colombier for (i=0; fp->ch[i]; i++) { 249*3e12c5d1SDavid du Colombier nstate = fp->nextstate; 250*3e12c5d1SDavid du Colombier if (nstate >= S_SELF) 251*3e12c5d1SDavid du Colombier nstate = ~nstate; 252*3e12c5d1SDavid du Colombier switch (fp->ch[i]) { 253*3e12c5d1SDavid du Colombier 254*3e12c5d1SDavid du Colombier case C_XX: /* random characters */ 255*3e12c5d1SDavid du Colombier for (j=0; j<257; j++) 256*3e12c5d1SDavid du Colombier bigfsm[j][fp->state] = nstate; 257*3e12c5d1SDavid du Colombier continue; 258*3e12c5d1SDavid du Colombier case C_ALPH: 259*3e12c5d1SDavid du Colombier for (j=0; j<=255; j++) 260*3e12c5d1SDavid du Colombier if ('a'<=j&&j<='z' || 'A'<=j&&j<='Z' 261*3e12c5d1SDavid du Colombier || UTF2(j) || UTF3(j) || j=='_') 262*3e12c5d1SDavid du Colombier bigfsm[j+1][fp->state] = nstate; 263*3e12c5d1SDavid du Colombier continue; 264*3e12c5d1SDavid du Colombier case C_NUM: 265*3e12c5d1SDavid du Colombier for (j='0'; j<='9'; j++) 266*3e12c5d1SDavid du Colombier bigfsm[j+1][fp->state] = nstate; 267*3e12c5d1SDavid du Colombier continue; 268*3e12c5d1SDavid du Colombier default: 269*3e12c5d1SDavid du Colombier bigfsm[fp->ch[i]+1][fp->state] = nstate; 270*3e12c5d1SDavid du Colombier } 271*3e12c5d1SDavid du Colombier } 272*3e12c5d1SDavid du Colombier } 273*3e12c5d1SDavid du Colombier /* install special cases for ? (trigraphs), \ (splicing), runes, and EOB */ 274*3e12c5d1SDavid du Colombier for (i=0; i<MAXSTATE; i++) { 275*3e12c5d1SDavid du Colombier for (j=0; j<0xFF; j++) 276*3e12c5d1SDavid du Colombier if (j=='?' || j=='\\' || UTF2(j) || UTF3(j)) { 277*3e12c5d1SDavid du Colombier if (bigfsm[j+1][i]>0) 278*3e12c5d1SDavid du Colombier bigfsm[j+1][i] = ~bigfsm[j+1][i]; 279*3e12c5d1SDavid du Colombier bigfsm[j+1][i] &= ~QBSBIT; 280*3e12c5d1SDavid du Colombier } 281*3e12c5d1SDavid du Colombier bigfsm[EOB+1][i] = ~S_EOB; 282*3e12c5d1SDavid du Colombier } 283*3e12c5d1SDavid du Colombier } 284*3e12c5d1SDavid du Colombier 285*3e12c5d1SDavid du Colombier void 286*3e12c5d1SDavid du Colombier fixlex(void) 287*3e12c5d1SDavid du Colombier { 288*3e12c5d1SDavid du Colombier /* do C++ comments? */ 289*3e12c5d1SDavid du Colombier if (Cplusplus==0) 290*3e12c5d1SDavid du Colombier bigfsm['/'+1][COM1] = bigfsm['x'+1][COM1]; 291*3e12c5d1SDavid du Colombier } 292*3e12c5d1SDavid du Colombier 293*3e12c5d1SDavid du Colombier /* 294*3e12c5d1SDavid du Colombier * fill in a row of tokens from input, terminated by NL or END 295*3e12c5d1SDavid du Colombier * First token is put at trp->lp. 296*3e12c5d1SDavid du Colombier * Reset is non-zero when the input buffer can be "rewound." 297*3e12c5d1SDavid du Colombier * The value is a flag indicating that possible macros have 298*3e12c5d1SDavid du Colombier * been seen in the row. 299*3e12c5d1SDavid du Colombier */ 300*3e12c5d1SDavid du Colombier int 301*3e12c5d1SDavid du Colombier gettokens(Tokenrow *trp, int reset) 302*3e12c5d1SDavid du Colombier { 303*3e12c5d1SDavid du Colombier register int c, state, oldstate; 304*3e12c5d1SDavid du Colombier register uchar *ip; 305*3e12c5d1SDavid du Colombier register Token *tp, *maxp; 306*3e12c5d1SDavid du Colombier int runelen; 307*3e12c5d1SDavid du Colombier Source *s = cursource; 308*3e12c5d1SDavid du Colombier int nmac = 0; 309*3e12c5d1SDavid du Colombier extern char outbuf[]; 310*3e12c5d1SDavid du Colombier 311*3e12c5d1SDavid du Colombier tp = trp->lp; 312*3e12c5d1SDavid du Colombier ip = s->inp; 313*3e12c5d1SDavid du Colombier if (reset) { 314*3e12c5d1SDavid du Colombier s->lineinc = 0; 315*3e12c5d1SDavid du Colombier if (ip>=s->inl) { /* nothing in buffer */ 316*3e12c5d1SDavid du Colombier s->inl = s->inb; 317*3e12c5d1SDavid du Colombier fillbuf(s); 318*3e12c5d1SDavid du Colombier ip = s->inp = s->inb; 319*3e12c5d1SDavid du Colombier } else if (ip >= s->inb+(3*INS/4)) { 320*3e12c5d1SDavid du Colombier memmove(s->inb, ip, 4+s->inl-ip); 321*3e12c5d1SDavid du Colombier s->inl = s->inb+(s->inl-ip); 322*3e12c5d1SDavid du Colombier ip = s->inp = s->inb; 323*3e12c5d1SDavid du Colombier } 324*3e12c5d1SDavid du Colombier } 325*3e12c5d1SDavid du Colombier maxp = &trp->bp[trp->max]; 326*3e12c5d1SDavid du Colombier runelen = 1; 327*3e12c5d1SDavid du Colombier for (;;) { 328*3e12c5d1SDavid du Colombier continue2: 329*3e12c5d1SDavid du Colombier if (tp>=maxp) { 330*3e12c5d1SDavid du Colombier trp->lp = tp; 331*3e12c5d1SDavid du Colombier tp = growtokenrow(trp); 332*3e12c5d1SDavid du Colombier maxp = &trp->bp[trp->max]; 333*3e12c5d1SDavid du Colombier } 334*3e12c5d1SDavid du Colombier tp->type = UNCLASS; 335*3e12c5d1SDavid du Colombier tp->hideset = 0; 336*3e12c5d1SDavid du Colombier tp->t = ip; 337*3e12c5d1SDavid du Colombier tp->wslen = 0; 338*3e12c5d1SDavid du Colombier tp->flag = 0; 339*3e12c5d1SDavid du Colombier state = START; 340*3e12c5d1SDavid du Colombier for (;;) { 341*3e12c5d1SDavid du Colombier oldstate = state; 342*3e12c5d1SDavid du Colombier c = *ip; 343*3e12c5d1SDavid du Colombier if ((state = bigfsm[c+1][state]) >= 0) { 344*3e12c5d1SDavid du Colombier ip += runelen; 345*3e12c5d1SDavid du Colombier runelen = 1; 346*3e12c5d1SDavid du Colombier continue; 347*3e12c5d1SDavid du Colombier } 348*3e12c5d1SDavid du Colombier state = ~state; 349*3e12c5d1SDavid du Colombier reswitch: 350*3e12c5d1SDavid du Colombier switch (state&0177) { 351*3e12c5d1SDavid du Colombier case S_SELF: 352*3e12c5d1SDavid du Colombier ip += runelen; 353*3e12c5d1SDavid du Colombier runelen = 1; 354*3e12c5d1SDavid du Colombier case S_SELFB: 355*3e12c5d1SDavid du Colombier tp->type = GETACT(state); 356*3e12c5d1SDavid du Colombier tp->len = ip - tp->t; 357*3e12c5d1SDavid du Colombier tp++; 358*3e12c5d1SDavid du Colombier goto continue2; 359*3e12c5d1SDavid du Colombier 360*3e12c5d1SDavid du Colombier case S_NAME: /* like S_SELFB but with nmac check */ 361*3e12c5d1SDavid du Colombier tp->type = NAME; 362*3e12c5d1SDavid du Colombier tp->len = ip - tp->t; 363*3e12c5d1SDavid du Colombier nmac |= quicklook(tp->t[0], tp->len>1?tp->t[1]:0); 364*3e12c5d1SDavid du Colombier tp++; 365*3e12c5d1SDavid du Colombier goto continue2; 366*3e12c5d1SDavid du Colombier 367*3e12c5d1SDavid du Colombier case S_WS: 368*3e12c5d1SDavid du Colombier tp->wslen = ip - tp->t; 369*3e12c5d1SDavid du Colombier tp->t = ip; 370*3e12c5d1SDavid du Colombier state = START; 371*3e12c5d1SDavid du Colombier continue; 372*3e12c5d1SDavid du Colombier 373*3e12c5d1SDavid du Colombier default: 374*3e12c5d1SDavid du Colombier if ((state&QBSBIT)==0) { 375*3e12c5d1SDavid du Colombier ip += runelen; 376*3e12c5d1SDavid du Colombier runelen = 1; 377*3e12c5d1SDavid du Colombier continue; 378*3e12c5d1SDavid du Colombier } 379*3e12c5d1SDavid du Colombier state &= ~QBSBIT; 380*3e12c5d1SDavid du Colombier s->inp = ip; 381*3e12c5d1SDavid du Colombier if (c=='?') { /* check trigraph */ 382*3e12c5d1SDavid du Colombier if (trigraph(s)) { 383*3e12c5d1SDavid du Colombier state = oldstate; 384*3e12c5d1SDavid du Colombier continue; 385*3e12c5d1SDavid du Colombier } 386*3e12c5d1SDavid du Colombier goto reswitch; 387*3e12c5d1SDavid du Colombier } 388*3e12c5d1SDavid du Colombier if (c=='\\') { /* line-folding */ 389*3e12c5d1SDavid du Colombier if (foldline(s)) { 390*3e12c5d1SDavid du Colombier s->lineinc++; 391*3e12c5d1SDavid du Colombier state = oldstate; 392*3e12c5d1SDavid du Colombier continue; 393*3e12c5d1SDavid du Colombier } 394*3e12c5d1SDavid du Colombier goto reswitch; 395*3e12c5d1SDavid du Colombier } 396*3e12c5d1SDavid du Colombier if (UTF2(c)) { 397*3e12c5d1SDavid du Colombier runelen = 2; 398*3e12c5d1SDavid du Colombier goto reswitch; 399*3e12c5d1SDavid du Colombier } 400*3e12c5d1SDavid du Colombier if (UTF3(c)) { 401*3e12c5d1SDavid du Colombier runelen = 3; 402*3e12c5d1SDavid du Colombier goto reswitch; 403*3e12c5d1SDavid du Colombier } 404*3e12c5d1SDavid du Colombier error(WARNING, "Lexical botch in cpp"); 405*3e12c5d1SDavid du Colombier ip += runelen; 406*3e12c5d1SDavid du Colombier runelen = 1; 407*3e12c5d1SDavid du Colombier continue; 408*3e12c5d1SDavid du Colombier 409*3e12c5d1SDavid du Colombier case S_EOB: 410*3e12c5d1SDavid du Colombier s->inp = ip; 411*3e12c5d1SDavid du Colombier fillbuf(cursource); 412*3e12c5d1SDavid du Colombier state = oldstate; 413*3e12c5d1SDavid du Colombier continue; 414*3e12c5d1SDavid du Colombier 415*3e12c5d1SDavid du Colombier case S_EOF: 416*3e12c5d1SDavid du Colombier tp->type = END; 417*3e12c5d1SDavid du Colombier tp->len = 0; 418*3e12c5d1SDavid du Colombier s->inp = ip; 419*3e12c5d1SDavid du Colombier if (tp!=trp->bp && (tp-1)->type!=NL && cursource->fd!=-1) 420*3e12c5d1SDavid du Colombier error(WARNING,"No newline at end of file"); 421*3e12c5d1SDavid du Colombier trp->lp = tp+1; 422*3e12c5d1SDavid du Colombier return nmac; 423*3e12c5d1SDavid du Colombier 424*3e12c5d1SDavid du Colombier case S_STNL: 425*3e12c5d1SDavid du Colombier error(ERROR, "Unterminated string or char const"); 426*3e12c5d1SDavid du Colombier case S_NL: 427*3e12c5d1SDavid du Colombier tp->t = ip; 428*3e12c5d1SDavid du Colombier tp->type = NL; 429*3e12c5d1SDavid du Colombier tp->len = 1; 430*3e12c5d1SDavid du Colombier tp->wslen = 0; 431*3e12c5d1SDavid du Colombier s->lineinc++; 432*3e12c5d1SDavid du Colombier s->inp = ip+1; 433*3e12c5d1SDavid du Colombier trp->lp = tp+1; 434*3e12c5d1SDavid du Colombier return nmac; 435*3e12c5d1SDavid du Colombier 436*3e12c5d1SDavid du Colombier case S_EOFSTR: 437*3e12c5d1SDavid du Colombier error(FATAL, "EOF in string or char constant"); 438*3e12c5d1SDavid du Colombier break; 439*3e12c5d1SDavid du Colombier 440*3e12c5d1SDavid du Colombier case S_COMNL: 441*3e12c5d1SDavid du Colombier s->lineinc++; 442*3e12c5d1SDavid du Colombier state = COM2; 443*3e12c5d1SDavid du Colombier ip += runelen; 444*3e12c5d1SDavid du Colombier runelen = 1; 445*3e12c5d1SDavid du Colombier continue; 446*3e12c5d1SDavid du Colombier 447*3e12c5d1SDavid du Colombier case S_EOFCOM: 448*3e12c5d1SDavid du Colombier error(WARNING, "EOF inside comment"); 449*3e12c5d1SDavid du Colombier --ip; 450*3e12c5d1SDavid du Colombier case S_COMMENT: 451*3e12c5d1SDavid du Colombier ++ip; 452*3e12c5d1SDavid du Colombier tp->t = ip; 453*3e12c5d1SDavid du Colombier tp->t[-1] = ' '; 454*3e12c5d1SDavid du Colombier tp->wslen = 1; 455*3e12c5d1SDavid du Colombier state = START; 456*3e12c5d1SDavid du Colombier continue; 457*3e12c5d1SDavid du Colombier } 458*3e12c5d1SDavid du Colombier break; 459*3e12c5d1SDavid du Colombier } 460*3e12c5d1SDavid du Colombier ip += runelen; 461*3e12c5d1SDavid du Colombier runelen = 1; 462*3e12c5d1SDavid du Colombier tp->len = ip - tp->t; 463*3e12c5d1SDavid du Colombier tp++; 464*3e12c5d1SDavid du Colombier } 465*3e12c5d1SDavid du Colombier } 466*3e12c5d1SDavid du Colombier 467*3e12c5d1SDavid du Colombier /* have seen ?; handle the trigraph it starts (if any) else 0 */ 468*3e12c5d1SDavid du Colombier int 469*3e12c5d1SDavid du Colombier trigraph(Source *s) 470*3e12c5d1SDavid du Colombier { 471*3e12c5d1SDavid du Colombier int c; 472*3e12c5d1SDavid du Colombier 473*3e12c5d1SDavid du Colombier while (s->inp+2 >= s->inl && fillbuf(s)!=EOF) 474*3e12c5d1SDavid du Colombier ; 475*3e12c5d1SDavid du Colombier if (s->inp[1]!='?') 476*3e12c5d1SDavid du Colombier return 0; 477*3e12c5d1SDavid du Colombier c = 0; 478*3e12c5d1SDavid du Colombier switch(s->inp[2]) { 479*3e12c5d1SDavid du Colombier case '=': 480*3e12c5d1SDavid du Colombier c = '#'; break; 481*3e12c5d1SDavid du Colombier case '(': 482*3e12c5d1SDavid du Colombier c = '['; break; 483*3e12c5d1SDavid du Colombier case '/': 484*3e12c5d1SDavid du Colombier c = '\\'; break; 485*3e12c5d1SDavid du Colombier case ')': 486*3e12c5d1SDavid du Colombier c = ']'; break; 487*3e12c5d1SDavid du Colombier case '\'': 488*3e12c5d1SDavid du Colombier c = '^'; break; 489*3e12c5d1SDavid du Colombier case '<': 490*3e12c5d1SDavid du Colombier c = '{'; break; 491*3e12c5d1SDavid du Colombier case '!': 492*3e12c5d1SDavid du Colombier c = '|'; break; 493*3e12c5d1SDavid du Colombier case '>': 494*3e12c5d1SDavid du Colombier c = '}'; break; 495*3e12c5d1SDavid du Colombier case '-': 496*3e12c5d1SDavid du Colombier c = '~'; break; 497*3e12c5d1SDavid du Colombier } 498*3e12c5d1SDavid du Colombier if (c) { 499*3e12c5d1SDavid du Colombier *s->inp = c; 500*3e12c5d1SDavid du Colombier memmove(s->inp+1, s->inp+3, s->inl-s->inp+2); 501*3e12c5d1SDavid du Colombier s->inl -= 2; 502*3e12c5d1SDavid du Colombier } 503*3e12c5d1SDavid du Colombier return c; 504*3e12c5d1SDavid du Colombier } 505*3e12c5d1SDavid du Colombier 506*3e12c5d1SDavid du Colombier int 507*3e12c5d1SDavid du Colombier foldline(Source *s) 508*3e12c5d1SDavid du Colombier { 509*3e12c5d1SDavid du Colombier while (s->inp+1 >= s->inl && fillbuf(s)!=EOF) 510*3e12c5d1SDavid du Colombier ; 511*3e12c5d1SDavid du Colombier if (s->inp[1] == '\n') { 512*3e12c5d1SDavid du Colombier memmove(s->inp, s->inp+2, s->inl-s->inp+3); 513*3e12c5d1SDavid du Colombier s->inl -= 2; 514*3e12c5d1SDavid du Colombier return 1; 515*3e12c5d1SDavid du Colombier } 516*3e12c5d1SDavid du Colombier return 0; 517*3e12c5d1SDavid du Colombier } 518*3e12c5d1SDavid du Colombier 519*3e12c5d1SDavid du Colombier int 520*3e12c5d1SDavid du Colombier fillbuf(Source *s) 521*3e12c5d1SDavid du Colombier { 522*3e12c5d1SDavid du Colombier int n; 523*3e12c5d1SDavid du Colombier 524*3e12c5d1SDavid du Colombier if (s->fd<0) 525*3e12c5d1SDavid du Colombier n = 0; 526*3e12c5d1SDavid du Colombier else if ((n=read(s->fd, (char *)s->inl, INS/8)) <= 0) 527*3e12c5d1SDavid du Colombier n = 0; 528*3e12c5d1SDavid du Colombier s->inl += n; 529*3e12c5d1SDavid du Colombier s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOB; 530*3e12c5d1SDavid du Colombier if (n==0) { 531*3e12c5d1SDavid du Colombier s->inl[0] = EOF; 532*3e12c5d1SDavid du Colombier return EOF; 533*3e12c5d1SDavid du Colombier } 534*3e12c5d1SDavid du Colombier return 0; 535*3e12c5d1SDavid du Colombier } 536*3e12c5d1SDavid du Colombier 537*3e12c5d1SDavid du Colombier /* 538*3e12c5d1SDavid du Colombier * Push down to new source of characters. 539*3e12c5d1SDavid du Colombier * If fd>0 and str==NULL, then from a file `name'; 540*3e12c5d1SDavid du Colombier * if fd==-1 and str, then from the string. 541*3e12c5d1SDavid du Colombier */ 542*3e12c5d1SDavid du Colombier Source * 543*3e12c5d1SDavid du Colombier setsource(char *name, int fd, char *str) 544*3e12c5d1SDavid du Colombier { 545*3e12c5d1SDavid du Colombier Source *s = new(Source); 546*3e12c5d1SDavid du Colombier int len; 547*3e12c5d1SDavid du Colombier 548*3e12c5d1SDavid du Colombier s->line = 1; 549*3e12c5d1SDavid du Colombier s->lineinc = 0; 550*3e12c5d1SDavid du Colombier s->fd = fd; 551*3e12c5d1SDavid du Colombier s->filename = name; 552*3e12c5d1SDavid du Colombier /* slop at right for EOB */ 553*3e12c5d1SDavid du Colombier if (str) { 554*3e12c5d1SDavid du Colombier len = strlen(str); 555*3e12c5d1SDavid du Colombier s->inb = domalloc(len+4); 556*3e12c5d1SDavid du Colombier s->inp = s->inb; 557*3e12c5d1SDavid du Colombier strncpy((char *)s->inp, str, len); 558*3e12c5d1SDavid du Colombier } else { 559*3e12c5d1SDavid du Colombier s->inb = domalloc(INS+4); 560*3e12c5d1SDavid du Colombier s->inp = s->inb; 561*3e12c5d1SDavid du Colombier len = 0; 562*3e12c5d1SDavid du Colombier } 563*3e12c5d1SDavid du Colombier s->inl = s->inp+len; 564*3e12c5d1SDavid du Colombier s->inl[0] = s->inl[1] = EOB; 565*3e12c5d1SDavid du Colombier s->next = cursource; 566*3e12c5d1SDavid du Colombier s->ifdepth = 0; 567*3e12c5d1SDavid du Colombier cursource = s; 568*3e12c5d1SDavid du Colombier return s; 569*3e12c5d1SDavid du Colombier } 570*3e12c5d1SDavid du Colombier 571*3e12c5d1SDavid du Colombier void 572*3e12c5d1SDavid du Colombier unsetsource(void) 573*3e12c5d1SDavid du Colombier { 574*3e12c5d1SDavid du Colombier Source *s = cursource; 575*3e12c5d1SDavid du Colombier 576*3e12c5d1SDavid du Colombier if (s->fd>=0) { 577*3e12c5d1SDavid du Colombier close(s->fd); 578*3e12c5d1SDavid du Colombier dofree(s->inb); 579*3e12c5d1SDavid du Colombier } 580*3e12c5d1SDavid du Colombier cursource = s->next; 581*3e12c5d1SDavid du Colombier dofree(s); 582*3e12c5d1SDavid du Colombier } 583