1*37da2899SCharles.Forsythstrhas(s: string, c: int): ref Val 2*37da2899SCharles.Forsyth{ 3*37da2899SCharles.Forsyth for(i := 0; i < len s; i++) 4*37da2899SCharles.Forsyth if(s[i] == c) 5*37da2899SCharles.Forsyth return true; 6*37da2899SCharles.Forsyth return false; 7*37da2899SCharles.Forsyth} 8*37da2899SCharles.Forsyth 9*37da2899SCharles.Forsythrsplit(r: string): (string, string) 10*37da2899SCharles.Forsyth{ 11*37da2899SCharles.Forsyth esc := 0; 12*37da2899SCharles.Forsyth i := 1; # skip '/' 13*37da2899SCharles.Forsyth for(;;){ 14*37da2899SCharles.Forsyth c := r[i++]; 15*37da2899SCharles.Forsyth if(!esc && c == '/') 16*37da2899SCharles.Forsyth break; 17*37da2899SCharles.Forsyth esc = !esc && c == '\\'; 18*37da2899SCharles.Forsyth } 19*37da2899SCharles.Forsyth return (r[1: i-1], r[i: ]); 20*37da2899SCharles.Forsyth} 21*37da2899SCharles.Forsyth 22*37da2899SCharles.Forsythbadflags(f: string): int 23*37da2899SCharles.Forsyth{ 24*37da2899SCharles.Forsyth g := i := m := 0; 25*37da2899SCharles.Forsyth for(j := 0; j < len f; j++){ 26*37da2899SCharles.Forsyth case(f[j]){ 27*37da2899SCharles.Forsyth 'g' => 28*37da2899SCharles.Forsyth g++; 29*37da2899SCharles.Forsyth 'i' => 30*37da2899SCharles.Forsyth i++; 31*37da2899SCharles.Forsyth 'm' => 32*37da2899SCharles.Forsyth m++; 33*37da2899SCharles.Forsyth * => 34*37da2899SCharles.Forsyth return 1; 35*37da2899SCharles.Forsyth } 36*37da2899SCharles.Forsyth } 37*37da2899SCharles.Forsyth return g > 1 || i > 1 || m > 1; 38*37da2899SCharles.Forsyth} 39*37da2899SCharles.Forsyth 40*37da2899SCharles.Forsythregexpvals(ex: ref Exec, v: ref Val, o: ref Ecmascript->Obj): (string, string, int) 41*37da2899SCharles.Forsyth{ 42*37da2899SCharles.Forsyth if(v != nil){ 43*37da2899SCharles.Forsyth if(v.ty == TRegExp) 44*37da2899SCharles.Forsyth return (v.rev.p, v.rev.f, v.rev.i); 45*37da2899SCharles.Forsyth o = v.obj; 46*37da2899SCharles.Forsyth } 47*37da2899SCharles.Forsyth p := toString(ex, esget(ex, o, "source", 0)); 48*37da2899SCharles.Forsyth f := ""; 49*37da2899SCharles.Forsyth if(toBoolean(ex, esget(ex, o, "global", 0)) == true) 50*37da2899SCharles.Forsyth f += "g"; 51*37da2899SCharles.Forsyth if(toBoolean(ex, esget(ex, o, "ignoreCase", 0)) == true) 52*37da2899SCharles.Forsyth f += "i"; 53*37da2899SCharles.Forsyth if(toBoolean(ex, esget(ex, o, "multiline", 0)) == true) 54*37da2899SCharles.Forsyth f += "m"; 55*37da2899SCharles.Forsyth i := toInt32(ex, esget(ex, o, "lastIndex", 0)); 56*37da2899SCharles.Forsyth return (p, f, i); 57*37da2899SCharles.Forsyth} 58*37da2899SCharles.Forsyth 59*37da2899SCharles.Forsythnregexp(ex: ref Exec, nil: ref Ecmascript->Obj, args: array of ref Val): ref Ecmascript->Obj 60*37da2899SCharles.Forsyth{ 61*37da2899SCharles.Forsyth pat := biarg(args, 0); 62*37da2899SCharles.Forsyth flags := biarg(args, 1); 63*37da2899SCharles.Forsyth (p, f) := ("", ""); 64*37da2899SCharles.Forsyth if(isregexp(pat)){ 65*37da2899SCharles.Forsyth if(flags == undefined) 66*37da2899SCharles.Forsyth (p, f, nil) = regexpvals(ex, pat, nil); 67*37da2899SCharles.Forsyth else 68*37da2899SCharles.Forsyth runtime(ex, TypeError, "flags defined"); 69*37da2899SCharles.Forsyth } 70*37da2899SCharles.Forsyth else{ 71*37da2899SCharles.Forsyth if(pat == undefined) 72*37da2899SCharles.Forsyth p = ""; 73*37da2899SCharles.Forsyth else 74*37da2899SCharles.Forsyth p = toString(ex, pat); 75*37da2899SCharles.Forsyth if(flags == undefined) 76*37da2899SCharles.Forsyth f = ""; 77*37da2899SCharles.Forsyth else 78*37da2899SCharles.Forsyth f = toString(ex, flags); 79*37da2899SCharles.Forsyth } 80*37da2899SCharles.Forsyth o := nobj(ex, nil, array[] of { regexpval(p, f, 0) }); 81*37da2899SCharles.Forsyth if(badflags(f)) 82*37da2899SCharles.Forsyth runtime(ex, SyntaxError, "bad regexp flags"); 83*37da2899SCharles.Forsyth regex = ex; 84*37da2899SCharles.Forsyth (re, err) := compile(p, 1); 85*37da2899SCharles.Forsyth if(re == nil || err != nil) 86*37da2899SCharles.Forsyth runtime(ex, SyntaxError, "bad regexp pattern"); 87*37da2899SCharles.Forsyth o.re = re; 88*37da2899SCharles.Forsyth return o; 89*37da2899SCharles.Forsyth} 90*37da2899SCharles.Forsyth 91*37da2899SCharles.Forsythcregexp(ex: ref Exec, f, nil: ref Ecmascript->Obj, args: array of ref Val): ref Val 92*37da2899SCharles.Forsyth{ 93*37da2899SCharles.Forsyth pat := biarg(args, 0); 94*37da2899SCharles.Forsyth flags := biarg(args, 1); 95*37da2899SCharles.Forsyth if(isregexp(pat) && flags == undefined) 96*37da2899SCharles.Forsyth return pat; 97*37da2899SCharles.Forsyth return objval(nregexp(ex, f, args)); 98*37da2899SCharles.Forsyth} 99*37da2899SCharles.Forsyth 100*37da2899SCharles.Forsythcregexpprotoexec(ex: ref Exec, f, this: ref Ecmascript->Obj, args: array of ref Val): ref Val 101*37da2899SCharles.Forsyth{ 102*37da2899SCharles.Forsyth m: array of (int, int); 103*37da2899SCharles.Forsyth 104*37da2899SCharles.Forsyth regexpcheck(ex, this, f); 105*37da2899SCharles.Forsyth s := toString(ex, biarg(args, 0)); 106*37da2899SCharles.Forsyth l := len s; 107*37da2899SCharles.Forsyth i := toInt32(ex, esget(ex, this, "lastIndex", 0)); 108*37da2899SCharles.Forsyth e := 0; 109*37da2899SCharles.Forsyth glob := esget(ex, this, "global", 0); 110*37da2899SCharles.Forsyth multiline := esget(ex, this, "multiline", 0); 111*37da2899SCharles.Forsyth ignorecase := esget(ex, this, "ignoreCase", 0); 112*37da2899SCharles.Forsyth if(glob == false) 113*37da2899SCharles.Forsyth i = 0; 114*37da2899SCharles.Forsyth for(;;){ 115*37da2899SCharles.Forsyth if(i < 0 || i >= l){ 116*37da2899SCharles.Forsyth esput(ex, this, "lastIndex", numval(real 0), 0); 117*37da2899SCharles.Forsyth return null; 118*37da2899SCharles.Forsyth } 119*37da2899SCharles.Forsyth regex = ex; 120*37da2899SCharles.Forsyth m = executese(this.re, s, (i, len s), i == 0, 1, multiline == true, ignorecase == true); 121*37da2899SCharles.Forsyth if(m != nil) 122*37da2899SCharles.Forsyth break; 123*37da2899SCharles.Forsyth i++; 124*37da2899SCharles.Forsyth i = -1; # no need to loop with executese 125*37da2899SCharles.Forsyth } 126*37da2899SCharles.Forsyth (i, e) = m[0]; 127*37da2899SCharles.Forsyth if(glob == true) 128*37da2899SCharles.Forsyth esput(ex, this, "lastIndex", numval(real e), 0); 129*37da2899SCharles.Forsyth n := len m; 130*37da2899SCharles.Forsyth av := array[n] of ref Val; 131*37da2899SCharles.Forsyth for(j := 0; j < n; j++){ 132*37da2899SCharles.Forsyth (a, b) := m[j]; 133*37da2899SCharles.Forsyth if(a < 0) 134*37da2899SCharles.Forsyth av[j] = undefined; 135*37da2899SCharles.Forsyth else 136*37da2899SCharles.Forsyth av[j] = strval(s[a: b]); 137*37da2899SCharles.Forsyth } 138*37da2899SCharles.Forsyth a := narray(ex, nil, av); 139*37da2899SCharles.Forsyth esput(ex, a, "index", numval(real i), 0); 140*37da2899SCharles.Forsyth esput(ex, a, "input", strval(s), 0); 141*37da2899SCharles.Forsyth return objval(a); 142*37da2899SCharles.Forsyth} 143*37da2899SCharles.Forsyth 144*37da2899SCharles.Forsythcregexpprototest(ex: ref Exec, f, this: ref Ecmascript->Obj, args: array of ref Val): ref Val 145*37da2899SCharles.Forsyth{ 146*37da2899SCharles.Forsyth regexpcheck(ex, this, f); 147*37da2899SCharles.Forsyth v := cregexpprotoexec(ex, f, this, args); 148*37da2899SCharles.Forsyth if(!isnull(v)) 149*37da2899SCharles.Forsyth return true; 150*37da2899SCharles.Forsyth return false; 151*37da2899SCharles.Forsyth} 152*37da2899SCharles.Forsyth 153*37da2899SCharles.ForsythcregexpprototoString(ex: ref Exec, f, this: ref Ecmascript->Obj, nil: array of ref Val): ref Val 154*37da2899SCharles.Forsyth{ 155*37da2899SCharles.Forsyth regexpcheck(ex, this, f); 156*37da2899SCharles.Forsyth (p, fl, nil) := regexpvals(ex, nil, this); 157*37da2899SCharles.Forsyth return strval("/" + p + "/" + fl); 158*37da2899SCharles.Forsyth} 159*37da2899SCharles.Forsyth 160*37da2899SCharles.Forsythregexpcheck(ex: ref Exec, o: ref Ecmascript->Obj, f: ref Obj) 161*37da2899SCharles.Forsyth{ 162*37da2899SCharles.Forsyth if(f == nil) 163*37da2899SCharles.Forsyth s := "exec"; 164*37da2899SCharles.Forsyth else 165*37da2899SCharles.Forsyth s = f.val.str; 166*37da2899SCharles.Forsyth if(!isregexpobj(o)) 167*37da2899SCharles.Forsyth runtime(ex, TypeError, "RegExp.prototype." + s + " called on non-RegExp object"); 168*37da2899SCharles.Forsyth} 169*37da2899SCharles.Forsyth 170*37da2899SCharles.Forsythcstrprotomatch(ex: ref Exec, nil, this: ref Ecmascript->Obj, args: array of ref Val): ref Val 171*37da2899SCharles.Forsyth{ 172*37da2899SCharles.Forsyth v := biarg(args, 0); 173*37da2899SCharles.Forsyth if(!isregexp(v)) 174*37da2899SCharles.Forsyth re := nregexp(ex, nil, args); 175*37da2899SCharles.Forsyth else if(v.ty == TObj) 176*37da2899SCharles.Forsyth re = v.obj; 177*37da2899SCharles.Forsyth else 178*37da2899SCharles.Forsyth re = nobj(ex, nil, args); 179*37da2899SCharles.Forsyth s := toString(ex, this.val); 180*37da2899SCharles.Forsyth glob := esget(ex, re, "global", 0); 181*37da2899SCharles.Forsyth av := array[1] of ref Val; 182*37da2899SCharles.Forsyth av[0] = strval(s); 183*37da2899SCharles.Forsyth if(glob == false) 184*37da2899SCharles.Forsyth return cregexpprotoexec(ex, nil, re, av); 185*37da2899SCharles.Forsyth li := 0; 186*37da2899SCharles.Forsyth esput(ex, re, "lastIndex", numval(real li), 0); 187*37da2899SCharles.Forsyth ms: list of ref Val; 188*37da2899SCharles.Forsyth for(;;){ 189*37da2899SCharles.Forsyth v = cregexpprotoexec(ex, nil, re, av); 190*37da2899SCharles.Forsyth if(isnull(v)) 191*37da2899SCharles.Forsyth break; 192*37da2899SCharles.Forsyth ms = esget(ex, v.obj, "0", 0) :: ms; 193*37da2899SCharles.Forsyth ni := int toUint32(ex, esget(ex, re, "lastIndex", 0)); 194*37da2899SCharles.Forsyth if(ni == li) 195*37da2899SCharles.Forsyth esput(ex, re, "lastIndex", numval(real ++li), 0); 196*37da2899SCharles.Forsyth else 197*37da2899SCharles.Forsyth li = ni; 198*37da2899SCharles.Forsyth } 199*37da2899SCharles.Forsyth n := len ms; 200*37da2899SCharles.Forsyth av = array[n] of ref Val; 201*37da2899SCharles.Forsyth for(j := n-1; j >= 0; j--){ 202*37da2899SCharles.Forsyth av[j] = hd ms; 203*37da2899SCharles.Forsyth ms = tl ms; 204*37da2899SCharles.Forsyth } 205*37da2899SCharles.Forsyth return objval(narray(ex, nil, av)); 206*37da2899SCharles.Forsyth} 207*37da2899SCharles.Forsyth 208*37da2899SCharles.Forsythcstrprotoreplace(ex: ref Exec, nil, this: ref Ecmascript->Obj, args: array of ref Val): ref Val 209*37da2899SCharles.Forsyth{ 210*37da2899SCharles.Forsyth re: ref Ecmascript->Obj; 211*37da2899SCharles.Forsyth 212*37da2899SCharles.Forsyth v := biarg(args, 0); 213*37da2899SCharles.Forsyth rege := isregexp(v); 214*37da2899SCharles.Forsyth if(!rege){ 215*37da2899SCharles.Forsyth if(args == nil) 216*37da2899SCharles.Forsyth re = nregexp(ex, nil, args); 217*37da2899SCharles.Forsyth else 218*37da2899SCharles.Forsyth re = nregexp(ex, nil, args[0:1]); 219*37da2899SCharles.Forsyth } 220*37da2899SCharles.Forsyth else if(v.ty == TObj) 221*37da2899SCharles.Forsyth re = v.obj; 222*37da2899SCharles.Forsyth else 223*37da2899SCharles.Forsyth re = nobj(ex, nil, args); 224*37da2899SCharles.Forsyth s := toString(ex, this.val); 225*37da2899SCharles.Forsyth if(rege) 226*37da2899SCharles.Forsyth glob := esget(ex, re, "global", 0); 227*37da2899SCharles.Forsyth else 228*37da2899SCharles.Forsyth glob = false; 229*37da2899SCharles.Forsyth av := array[1] of ref Val; 230*37da2899SCharles.Forsyth av[0] = strval(s); 231*37da2899SCharles.Forsyth ms: list of ref Val; 232*37da2899SCharles.Forsyth li := 0; 233*37da2899SCharles.Forsyth if(glob == true) 234*37da2899SCharles.Forsyth esput(ex, re, "lastIndex", numval(real li), 0); 235*37da2899SCharles.Forsyth for(;;){ 236*37da2899SCharles.Forsyth v = cregexpprotoexec(ex, nil, re, av); 237*37da2899SCharles.Forsyth if(!isnull(v)) 238*37da2899SCharles.Forsyth ms = v :: ms; 239*37da2899SCharles.Forsyth if(isnull(v) || glob == false) 240*37da2899SCharles.Forsyth break; 241*37da2899SCharles.Forsyth ni := int toUint32(ex, esget(ex, re, "lastIndex", 0)); 242*37da2899SCharles.Forsyth if(ni == li) 243*37da2899SCharles.Forsyth esput(ex, re, "lastIndex", numval(real ++li), 0); 244*37da2899SCharles.Forsyth else 245*37da2899SCharles.Forsyth li = ni; 246*37da2899SCharles.Forsyth } 247*37da2899SCharles.Forsyth if(ms == nil) 248*37da2899SCharles.Forsyth return strval(s); 249*37da2899SCharles.Forsyth ms = rev(ms); 250*37da2899SCharles.Forsyth if(rege) 251*37da2899SCharles.Forsyth lcp := int toUint32(ex, esget(ex, (hd ms).obj, "length", 0))-1; 252*37da2899SCharles.Forsyth else 253*37da2899SCharles.Forsyth lcp = 0; 254*37da2899SCharles.Forsyth v = biarg(args, 1); 255*37da2899SCharles.Forsyth if(isobj(v) && isfuncobj(v.obj)){ 256*37da2899SCharles.Forsyth ns := s; 257*37da2899SCharles.Forsyth n := len ms; 258*37da2899SCharles.Forsyth args = array[lcp+3] of ref Val; 259*37da2899SCharles.Forsyth o := inc := 0; 260*37da2899SCharles.Forsyth for(i := 0; i < n; i++){ 261*37da2899SCharles.Forsyth a := (hd ms).obj; 262*37da2899SCharles.Forsyth ms = tl ms; 263*37da2899SCharles.Forsyth for(j := 0; j <= lcp; j++) 264*37da2899SCharles.Forsyth args[j] = esget(ex, a, string j, 0); 265*37da2899SCharles.Forsyth ss := toString(ex, args[0]); 266*37da2899SCharles.Forsyth o = offset(ss, s, o); 267*37da2899SCharles.Forsyth args[lcp+1] = numval(real o); 268*37da2899SCharles.Forsyth args[lcp+2] = strval(s); 269*37da2899SCharles.Forsyth rs := toString(ex, getValue(ex, escall(ex, v.obj, nil, args, 0))); 270*37da2899SCharles.Forsyth ns = repl(ns, o+inc, o+inc+len ss, rs); 271*37da2899SCharles.Forsyth o += len ss; 272*37da2899SCharles.Forsyth inc += len rs - len ss; 273*37da2899SCharles.Forsyth } 274*37da2899SCharles.Forsyth return strval(ns); 275*37da2899SCharles.Forsyth } 276*37da2899SCharles.Forsyth else{ 277*37da2899SCharles.Forsyth ps := toString(ex, v); 278*37da2899SCharles.Forsyth lps := len ps; 279*37da2899SCharles.Forsyth ns := s; 280*37da2899SCharles.Forsyth n := len ms; 281*37da2899SCharles.Forsyth o := inc := 0; 282*37da2899SCharles.Forsyth for(i := 0; i < n; i++){ 283*37da2899SCharles.Forsyth a := (hd ms).obj; 284*37da2899SCharles.Forsyth ms = tl ms; 285*37da2899SCharles.Forsyth ss := toString(ex, esget(ex, a, "0", 0)); 286*37da2899SCharles.Forsyth o = offset(ss, s, o); 287*37da2899SCharles.Forsyth rs := ""; 288*37da2899SCharles.Forsyth for(j := 0; j < lps; j++){ 289*37da2899SCharles.Forsyth if(ps[j] == '$' && j < lps-1){ 290*37da2899SCharles.Forsyth j++; 291*37da2899SCharles.Forsyth case(c := ps[j]){ 292*37da2899SCharles.Forsyth '$' => 293*37da2899SCharles.Forsyth rs += "$"; 294*37da2899SCharles.Forsyth '&' => 295*37da2899SCharles.Forsyth rs += ss; 296*37da2899SCharles.Forsyth '`' => 297*37da2899SCharles.Forsyth rs += s[0: o]; 298*37da2899SCharles.Forsyth ''' => 299*37da2899SCharles.Forsyth rs += s[o+len ss: ]; 300*37da2899SCharles.Forsyth '0' to '9' => 301*37da2899SCharles.Forsyth if(j < lps-1 && isdigit(ps[j+1])) 302*37da2899SCharles.Forsyth c = 10*(c-'0')+ps[++j]-'0'; 303*37da2899SCharles.Forsyth else 304*37da2899SCharles.Forsyth c = c-'0'; 305*37da2899SCharles.Forsyth if(c >= 1 && c <= lcp) 306*37da2899SCharles.Forsyth rs += toString(ex, esget(ex, a, string c, 0)); 307*37da2899SCharles.Forsyth } 308*37da2899SCharles.Forsyth } 309*37da2899SCharles.Forsyth else 310*37da2899SCharles.Forsyth rs += ps[j: j+1]; 311*37da2899SCharles.Forsyth } 312*37da2899SCharles.Forsyth ns = repl(ns, o+inc, o+inc+len ss, rs); 313*37da2899SCharles.Forsyth o += len ss; 314*37da2899SCharles.Forsyth inc += len rs - len ss; 315*37da2899SCharles.Forsyth } 316*37da2899SCharles.Forsyth return strval(ns); 317*37da2899SCharles.Forsyth } 318*37da2899SCharles.Forsyth} 319*37da2899SCharles.Forsyth 320*37da2899SCharles.Forsythcstrprotosearch(ex: ref Exec, nil, this: ref Ecmascript->Obj, args: array of ref Val): ref Val 321*37da2899SCharles.Forsyth{ 322*37da2899SCharles.Forsyth v := biarg(args, 0); 323*37da2899SCharles.Forsyth if(!isregexp(v)) 324*37da2899SCharles.Forsyth re := nregexp(ex, nil, args); 325*37da2899SCharles.Forsyth else if(v.ty == TObj) 326*37da2899SCharles.Forsyth re = v.obj; 327*37da2899SCharles.Forsyth else 328*37da2899SCharles.Forsyth re = nobj(ex, nil, args); 329*37da2899SCharles.Forsyth s := toString(ex, this.val); 330*37da2899SCharles.Forsyth glob := esget(ex, re, "global", 0); 331*37da2899SCharles.Forsyth esput(ex, re, "global", false, 0); 332*37da2899SCharles.Forsyth av := array[1] of ref Val; 333*37da2899SCharles.Forsyth av[0] = strval(s); 334*37da2899SCharles.Forsyth v = cregexpprotoexec(ex, nil, re, av); 335*37da2899SCharles.Forsyth if(isnull(v)) 336*37da2899SCharles.Forsyth r := -1; 337*37da2899SCharles.Forsyth else{ 338*37da2899SCharles.Forsyth ss := toString(ex, esget(ex, v.obj, "0", 0)); 339*37da2899SCharles.Forsyth r = offset(ss, s, 0); 340*37da2899SCharles.Forsyth } 341*37da2899SCharles.Forsyth esput(ex, re, "global", glob, 0); 342*37da2899SCharles.Forsyth return numval(real r); 343*37da2899SCharles.Forsyth} 344*37da2899SCharles.Forsyth 345*37da2899SCharles.Forsythoffset(ss: string, s: string, m: int): int 346*37da2899SCharles.Forsyth{ 347*37da2899SCharles.Forsyth nn := len ss; 348*37da2899SCharles.Forsyth n := len s; 349*37da2899SCharles.Forsyth for(i := m; i <= n-nn; i++){ 350*37da2899SCharles.Forsyth if(s[i: i+nn] == ss) 351*37da2899SCharles.Forsyth return i; 352*37da2899SCharles.Forsyth } 353*37da2899SCharles.Forsyth return -1; 354*37da2899SCharles.Forsyth} 355*37da2899SCharles.Forsyth 356*37da2899SCharles.Forsythrepl(s: string, a: int, b: int, ns: string): string 357*37da2899SCharles.Forsyth{ 358*37da2899SCharles.Forsyth return s[0: a] + ns + s[b: ]; 359*37da2899SCharles.Forsyth} 360*37da2899SCharles.Forsyth 361*37da2899SCharles.Forsythrev(ls: list of ref Val): list of ref Val 362*37da2899SCharles.Forsyth{ 363*37da2899SCharles.Forsyth ns: list of ref Val; 364*37da2899SCharles.Forsyth 365*37da2899SCharles.Forsyth for( ; ls != nil; ls = tl ls) 366*37da2899SCharles.Forsyth ns = hd ls :: ns; 367*37da2899SCharles.Forsyth return ns; 368*37da2899SCharles.Forsyth} 369*37da2899SCharles.Forsyth 370*37da2899SCharles.Forsyth######################################################################### 371*37da2899SCharles.Forsyth# regex.b originally 372*37da2899SCharles.Forsyth 373*37da2899SCharles.Forsyth# normally imported identifiers 374*37da2899SCharles.Forsyth 375*37da2899SCharles.Forsyth# internal identifiers, not normally imported 376*37da2899SCharles.Forsyth 377*37da2899SCharles.ForsythALT, CAT, DOT, SET, HAT, DOL, NUL, PCLO, CLO, OPT, LPN, RPN, LPN0, RPN0, LPN1, RPN1, LPN2, RPN2, BEET, BEEF, MNCLO, LCP, IDLE: con (1<<16)+iota; 378*37da2899SCharles.Forsyth 379*37da2899SCharles.Forsyth# syntax 380*37da2899SCharles.Forsyth 381*37da2899SCharles.Forsyth# RE ALT regular expression 382*37da2899SCharles.Forsyth# NUL 383*37da2899SCharles.Forsyth# ALT CAT alternation 384*37da2899SCharles.Forsyth# CAT | ALT 385*37da2899SCharles.Forsyth# 386*37da2899SCharles.Forsyth# CAT DUP catenation 387*37da2899SCharles.Forsyth# DUP CAT 388*37da2899SCharles.Forsyth# 389*37da2899SCharles.Forsyth# DUP PRIM possibly duplicated primary 390*37da2899SCharles.Forsyth# PCLO 391*37da2899SCharles.Forsyth# CLO 392*37da2899SCharles.Forsyth# OPT 393*37da2899SCharles.Forsyth# 394*37da2899SCharles.Forsyth# PCLO PRIM + 1 or more 395*37da2899SCharles.Forsyth# CLO PRIM * 0 or more 396*37da2899SCharles.Forsyth# OPT PRIM ? 0 or 1 397*37da2899SCharles.Forsyth# 398*37da2899SCharles.Forsyth# PRIM ( RE ) 399*37da2899SCharles.Forsyth# () 400*37da2899SCharles.Forsyth# DOT any character 401*37da2899SCharles.Forsyth# CHAR a single character 402*37da2899SCharles.Forsyth# ESC escape sequence 403*37da2899SCharles.Forsyth# [ SET ] character set 404*37da2899SCharles.Forsyth# NUL null string 405*37da2899SCharles.Forsyth# HAT beginning of string 406*37da2899SCharles.Forsyth# DOL end of string 407*37da2899SCharles.Forsyth# 408*37da2899SCharles.Forsyth 409*37da2899SCharles.Forsythregex: ref Exec; 410*37da2899SCharles.Forsyth 411*37da2899SCharles.ForsythNIL : con -1; # a refRex constant 412*37da2899SCharles.ForsythNONE: con -2; # ditto, for an un-set value 413*37da2899SCharles.ForsythBAD: con 1<<16; # a non-character 414*37da2899SCharles.ForsythHUGE: con (1<<31) - 1; 415*37da2899SCharles.Forsyth 416*37da2899SCharles.Forsyth# the data structures of re.m would like to be ref-linked, but are 417*37da2899SCharles.Forsyth# circular (see fn walk), thus instead of pointers we use indexes 418*37da2899SCharles.Forsyth# into an array (arena) of nodes of the syntax tree of a regular expression. 419*37da2899SCharles.Forsyth# from a storage-allocation standpoint, this replaces many small 420*37da2899SCharles.Forsyth# allocations of one size with one big one of variable size. 421*37da2899SCharles.Forsyth 422*37da2899SCharles.ForsythReStr: adt { 423*37da2899SCharles.Forsyth s : string; 424*37da2899SCharles.Forsyth i : int; # cursor postion 425*37da2899SCharles.Forsyth n : int; # number of chars left; -1 on error 426*37da2899SCharles.Forsyth peek : fn(s: self ref ReStr): int; 427*37da2899SCharles.Forsyth next : fn(s: self ref ReStr): int; 428*37da2899SCharles.Forsyth unput: fn(s: self ref ReStr); 429*37da2899SCharles.Forsyth}; 430*37da2899SCharles.Forsyth 431*37da2899SCharles.ForsythReStr.peek(s: self ref ReStr): int 432*37da2899SCharles.Forsyth{ 433*37da2899SCharles.Forsyth if(s.n <= 0) 434*37da2899SCharles.Forsyth return BAD; 435*37da2899SCharles.Forsyth return s.s[s.i]; 436*37da2899SCharles.Forsyth} 437*37da2899SCharles.Forsyth 438*37da2899SCharles.ForsythReStr.next(s: self ref ReStr): int 439*37da2899SCharles.Forsyth{ 440*37da2899SCharles.Forsyth if(s.n <= 0) 441*37da2899SCharles.Forsyth syntax("bad regular expression"); 442*37da2899SCharles.Forsyth s.n--; 443*37da2899SCharles.Forsyth return s.s[s.i++]; 444*37da2899SCharles.Forsyth} 445*37da2899SCharles.Forsyth 446*37da2899SCharles.ForsythReStr.unput(s: self ref ReStr) 447*37da2899SCharles.Forsyth{ 448*37da2899SCharles.Forsyth s.n++; 449*37da2899SCharles.Forsyth s.i--; 450*37da2899SCharles.Forsyth} 451*37da2899SCharles.Forsyth 452*37da2899SCharles.ForsythnewRe(kind: int, left, right: refRex, set: ref Set, ar: ref Arena, pno: int, greedy: int): refRex 453*37da2899SCharles.Forsyth{ 454*37da2899SCharles.Forsyth ar.rex[ar.ptr] = Rex(kind, left, right, set, pno, greedy, nil); 455*37da2899SCharles.Forsyth return ar.ptr++; 456*37da2899SCharles.Forsyth} 457*37da2899SCharles.Forsyth 458*37da2899SCharles.Forsyth# parse a regex by recursive descent to get a syntax tree 459*37da2899SCharles.Forsyth 460*37da2899SCharles.Forsythre(s: ref ReStr, ar: ref Arena): refRex 461*37da2899SCharles.Forsyth{ 462*37da2899SCharles.Forsyth left := cat(s, ar); 463*37da2899SCharles.Forsyth if(left==NIL || s.peek()!='|') 464*37da2899SCharles.Forsyth return left; 465*37da2899SCharles.Forsyth s.next(); 466*37da2899SCharles.Forsyth right := re(s, ar); 467*37da2899SCharles.Forsyth if(right == NIL) 468*37da2899SCharles.Forsyth return NIL; 469*37da2899SCharles.Forsyth return newRe(ALT, left, right, nil, ar, 0, 0); 470*37da2899SCharles.Forsyth} 471*37da2899SCharles.Forsyth 472*37da2899SCharles.Forsythcat(s: ref ReStr, ar: ref Arena): refRex 473*37da2899SCharles.Forsyth{ 474*37da2899SCharles.Forsyth left := dup(s, ar); 475*37da2899SCharles.Forsyth if(left == NIL) 476*37da2899SCharles.Forsyth return left; 477*37da2899SCharles.Forsyth right := cat(s, ar); 478*37da2899SCharles.Forsyth if(right == NIL) 479*37da2899SCharles.Forsyth return left; 480*37da2899SCharles.Forsyth return newRe(CAT, left, right, nil, ar, 0, 0); 481*37da2899SCharles.Forsyth} 482*37da2899SCharles.Forsyth 483*37da2899SCharles.Forsythdup(s: ref ReStr, ar: ref Arena): refRex 484*37da2899SCharles.Forsyth{ 485*37da2899SCharles.Forsyth n1, n2: int; 486*37da2899SCharles.Forsyth 487*37da2899SCharles.Forsyth case s.peek() { 488*37da2899SCharles.Forsyth BAD or ')' or ']' or '|' or '?' or '*' or '+' => 489*37da2899SCharles.Forsyth return NIL; 490*37da2899SCharles.Forsyth } 491*37da2899SCharles.Forsyth prim: refRex; 492*37da2899SCharles.Forsyth case kind:=s.next() { 493*37da2899SCharles.Forsyth '(' => if(ar.pno < 0) { 494*37da2899SCharles.Forsyth if(s.peek() == ')') { 495*37da2899SCharles.Forsyth s.next(); 496*37da2899SCharles.Forsyth prim = newRe(NUL, NONE, NONE, nil, ar, 0, 0); 497*37da2899SCharles.Forsyth } else { 498*37da2899SCharles.Forsyth prim = re(s, ar); 499*37da2899SCharles.Forsyth if(prim==NIL || s.next()!=')') 500*37da2899SCharles.Forsyth syntax("( with no )"); 501*37da2899SCharles.Forsyth } 502*37da2899SCharles.Forsyth } else { 503*37da2899SCharles.Forsyth pno := ++ar.pno; 504*37da2899SCharles.Forsyth lp := newRe(LPN, NONE, NONE, nil, ar, pno, 0); 505*37da2899SCharles.Forsyth rp := newRe(RPN, NONE, NONE, nil, ar, pno, 0); 506*37da2899SCharles.Forsyth if(s.peek() == ')') { 507*37da2899SCharles.Forsyth s.next(); 508*37da2899SCharles.Forsyth prim = newRe(CAT, lp, rp, nil, ar, 0, 0); 509*37da2899SCharles.Forsyth } else { 510*37da2899SCharles.Forsyth if(s.peek() == '?'){ 511*37da2899SCharles.Forsyth s.next(); 512*37da2899SCharles.Forsyth case s.next(){ 513*37da2899SCharles.Forsyth ':' => ar.rex[lp].kind = LPN0; 514*37da2899SCharles.Forsyth ar.rex[rp].kind = RPN0; 515*37da2899SCharles.Forsyth '=' => ar.rex[lp].kind = LPN1; 516*37da2899SCharles.Forsyth ar.rex[rp].kind = RPN1; 517*37da2899SCharles.Forsyth '!' => ar.rex[lp].kind = LPN2; 518*37da2899SCharles.Forsyth ar.rex[rp].kind = RPN2; 519*37da2899SCharles.Forsyth * => syntax("bad char after ?"); 520*37da2899SCharles.Forsyth } 521*37da2899SCharles.Forsyth } 522*37da2899SCharles.Forsyth prim = re(s, ar); 523*37da2899SCharles.Forsyth if(prim==NIL || s.next()!=')') 524*37da2899SCharles.Forsyth syntax("( with no )"); 525*37da2899SCharles.Forsyth else { 526*37da2899SCharles.Forsyth prim = newRe(CAT, prim, rp, nil, ar, 0, 0); 527*37da2899SCharles.Forsyth prim = newRe(CAT, lp, prim, nil, ar, 0, 0); 528*37da2899SCharles.Forsyth } 529*37da2899SCharles.Forsyth } 530*37da2899SCharles.Forsyth } 531*37da2899SCharles.Forsyth '[' => prim = newRe(SET, NONE, NONE, newSet(s), ar, 0, 0); 532*37da2899SCharles.Forsyth * => case kind { 533*37da2899SCharles.Forsyth '.' => kind = DOT; 534*37da2899SCharles.Forsyth '^' => kind = HAT; 535*37da2899SCharles.Forsyth '$' => kind = DOL; 536*37da2899SCharles.Forsyth } 537*37da2899SCharles.Forsyth (c, set, op) := esc(s, kind, 0); 538*37da2899SCharles.Forsyth if(set != nil) 539*37da2899SCharles.Forsyth prim = newRe(SET, NONE, NONE, set, ar, 0, 0); 540*37da2899SCharles.Forsyth else if(op == LCP){ 541*37da2899SCharles.Forsyth if(c > ar.pno) 542*37da2899SCharles.Forsyth syntax("\num too big"); 543*37da2899SCharles.Forsyth prim = newRe(LCP, NONE, NONE, nil, ar, 0, 0); 544*37da2899SCharles.Forsyth ar.rex[prim].ns = ref Nstate(c, c); 545*37da2899SCharles.Forsyth } 546*37da2899SCharles.Forsyth else 547*37da2899SCharles.Forsyth prim = newRe(c, NONE, NONE, nil, ar, 0, 0); 548*37da2899SCharles.Forsyth } 549*37da2899SCharles.Forsyth case s.peek() { 550*37da2899SCharles.Forsyth '*' => kind = CLO; 551*37da2899SCharles.Forsyth '+' => kind = PCLO; 552*37da2899SCharles.Forsyth '?' => kind = OPT; 553*37da2899SCharles.Forsyth '{' => s.next(); 554*37da2899SCharles.Forsyth (n1, n2) = drange(s); 555*37da2899SCharles.Forsyth kind = MNCLO; 556*37da2899SCharles.Forsyth if(s.peek() != '}') 557*37da2899SCharles.Forsyth syntax("{ with no }"); 558*37da2899SCharles.Forsyth * => return prim; 559*37da2899SCharles.Forsyth } 560*37da2899SCharles.Forsyth s.next(); 561*37da2899SCharles.Forsyth greedy := 1; 562*37da2899SCharles.Forsyth if(s.peek() == '?'){ 563*37da2899SCharles.Forsyth # non-greedy op 564*37da2899SCharles.Forsyth greedy = 0; 565*37da2899SCharles.Forsyth s.next(); 566*37da2899SCharles.Forsyth } 567*37da2899SCharles.Forsyth prim = newRe(kind, prim, NONE, nil, ar, 0, greedy); 568*37da2899SCharles.Forsyth if(kind == MNCLO) 569*37da2899SCharles.Forsyth ns := ar.rex[prim].ns = ref Nstate(n1, n2); 570*37da2899SCharles.Forsyth return prim; 571*37da2899SCharles.Forsyth} 572*37da2899SCharles.Forsyth 573*37da2899SCharles.Forsythesc(s: ref ReStr, char: int, inset: int): (int, ref Set, int) 574*37da2899SCharles.Forsyth{ 575*37da2899SCharles.Forsyth set: ref Set; 576*37da2899SCharles.Forsyth 577*37da2899SCharles.Forsyth op := 0; 578*37da2899SCharles.Forsyth if(char == '\\') { 579*37da2899SCharles.Forsyth char = s.next(); 580*37da2899SCharles.Forsyth case char { 581*37da2899SCharles.Forsyth 'b' => 582*37da2899SCharles.Forsyth if(inset) 583*37da2899SCharles.Forsyth char = '\b'; 584*37da2899SCharles.Forsyth else 585*37da2899SCharles.Forsyth char = BEET; 586*37da2899SCharles.Forsyth 'B' => if(inset) 587*37da2899SCharles.Forsyth syntax("\\B in set"); 588*37da2899SCharles.Forsyth else 589*37da2899SCharles.Forsyth char = BEEF; 590*37da2899SCharles.Forsyth 'f' => char = '\u000c'; 591*37da2899SCharles.Forsyth 'n' => char = '\n'; 592*37da2899SCharles.Forsyth 'r' => char = '\r'; 593*37da2899SCharles.Forsyth 't' => char = '\t'; 594*37da2899SCharles.Forsyth 'v' => char = '\v'; 595*37da2899SCharles.Forsyth '0' to '9' => 596*37da2899SCharles.Forsyth s.unput(); 597*37da2899SCharles.Forsyth char = digits(s); 598*37da2899SCharles.Forsyth if(char == 0) 599*37da2899SCharles.Forsyth char = '\0'; 600*37da2899SCharles.Forsyth else if(inset) 601*37da2899SCharles.Forsyth syntax("\num in set"); 602*37da2899SCharles.Forsyth else 603*37da2899SCharles.Forsyth op = LCP; 604*37da2899SCharles.Forsyth 'x' => char = hexdigits(s, 2); 605*37da2899SCharles.Forsyth 'u' => char = hexdigits(s, 4); 606*37da2899SCharles.Forsyth 'c' => char = s.next()%32; 607*37da2899SCharles.Forsyth 'd' or 'D' => 608*37da2899SCharles.Forsyth set = newset('0', '9'); 609*37da2899SCharles.Forsyth if(char == 'D') 610*37da2899SCharles.Forsyth set.neg = 1; 611*37da2899SCharles.Forsyth 's' or 'S' => 612*37da2899SCharles.Forsyth set = newset(' ', ' '); 613*37da2899SCharles.Forsyth addsets(set, "\t\v\u000c\u00a0\n\r\u2028\u2029"); 614*37da2899SCharles.Forsyth if(char == 'S') 615*37da2899SCharles.Forsyth set.neg = 1; 616*37da2899SCharles.Forsyth 'w' or 'W' => 617*37da2899SCharles.Forsyth set = newset('0', '9'); 618*37da2899SCharles.Forsyth addset(set, 'a', 'z'); 619*37da2899SCharles.Forsyth addset(set, 'A', 'Z'); 620*37da2899SCharles.Forsyth addset(set, '_', '_'); 621*37da2899SCharles.Forsyth if(char == 'W') 622*37da2899SCharles.Forsyth set.neg = 1; 623*37da2899SCharles.Forsyth * => 624*37da2899SCharles.Forsyth ; 625*37da2899SCharles.Forsyth } 626*37da2899SCharles.Forsyth } 627*37da2899SCharles.Forsyth if(char == -1){ 628*37da2899SCharles.Forsyth if(inset) 629*37da2899SCharles.Forsyth syntax("bad set"); 630*37da2899SCharles.Forsyth else 631*37da2899SCharles.Forsyth syntax("bad character"); 632*37da2899SCharles.Forsyth } 633*37da2899SCharles.Forsyth return (char, set, op); 634*37da2899SCharles.Forsyth} 635*37da2899SCharles.Forsyth 636*37da2899SCharles.Forsythisdigit(c: int): int 637*37da2899SCharles.Forsyth{ 638*37da2899SCharles.Forsyth return c >= '0' && c <= '9'; 639*37da2899SCharles.Forsyth} 640*37da2899SCharles.Forsyth 641*37da2899SCharles.Forsythislower(c: int): int 642*37da2899SCharles.Forsyth{ 643*37da2899SCharles.Forsyth return c >= 'a' && c <= 'z'; 644*37da2899SCharles.Forsyth} 645*37da2899SCharles.Forsyth 646*37da2899SCharles.Forsythisupper(c: int): int 647*37da2899SCharles.Forsyth{ 648*37da2899SCharles.Forsyth return c >= 'A' && c <= 'Z'; 649*37da2899SCharles.Forsyth} 650*37da2899SCharles.Forsyth 651*37da2899SCharles.Forsythisalpha(c: int): int 652*37da2899SCharles.Forsyth{ 653*37da2899SCharles.Forsyth return islower(c) || isupper(c); 654*37da2899SCharles.Forsyth} 655*37da2899SCharles.Forsyth 656*37da2899SCharles.Forsythhexdigit(c: int): int 657*37da2899SCharles.Forsyth{ 658*37da2899SCharles.Forsyth if(isdigit(c)) 659*37da2899SCharles.Forsyth return c-'0'; 660*37da2899SCharles.Forsyth if('a' <= c && c <= 'f') 661*37da2899SCharles.Forsyth return c-'a'+10; 662*37da2899SCharles.Forsyth if('A' <= c && c <= 'F') 663*37da2899SCharles.Forsyth return c-'A'+10; 664*37da2899SCharles.Forsyth return -1; 665*37da2899SCharles.Forsyth} 666*37da2899SCharles.Forsyth 667*37da2899SCharles.Forsythdigits(s: ref ReStr): int 668*37da2899SCharles.Forsyth{ 669*37da2899SCharles.Forsyth n := 0; 670*37da2899SCharles.Forsyth while(isdigit(s.peek())) 671*37da2899SCharles.Forsyth n = 10*n + s.next() -'0'; 672*37da2899SCharles.Forsyth return n; 673*37da2899SCharles.Forsyth} 674*37da2899SCharles.Forsyth 675*37da2899SCharles.Forsythhexdigits(s: ref ReStr, n: int): int 676*37da2899SCharles.Forsyth{ 677*37da2899SCharles.Forsyth x := 0; 678*37da2899SCharles.Forsyth for(i := 0; i < n; i++){ 679*37da2899SCharles.Forsyth v := hexdigit(s.next()); 680*37da2899SCharles.Forsyth if(v < 0) 681*37da2899SCharles.Forsyth return -1; 682*37da2899SCharles.Forsyth x = 16*x+v; 683*37da2899SCharles.Forsyth } 684*37da2899SCharles.Forsyth return x; 685*37da2899SCharles.Forsyth} 686*37da2899SCharles.Forsyth 687*37da2899SCharles.Forsythdrange(s: ref ReStr): (int, int) 688*37da2899SCharles.Forsyth{ 689*37da2899SCharles.Forsyth n1 := n2 := -1; 690*37da2899SCharles.Forsyth if(isdigit(s.peek())) 691*37da2899SCharles.Forsyth n1 = digits(s); 692*37da2899SCharles.Forsyth if(s.peek() == ','){ 693*37da2899SCharles.Forsyth s.next(); 694*37da2899SCharles.Forsyth if(isdigit(s.peek())) 695*37da2899SCharles.Forsyth n2 = digits(s); 696*37da2899SCharles.Forsyth else 697*37da2899SCharles.Forsyth n2 = HUGE; 698*37da2899SCharles.Forsyth } 699*37da2899SCharles.Forsyth else 700*37da2899SCharles.Forsyth n2 = n1; 701*37da2899SCharles.Forsyth if(n1 < 0 || n1 > n2) 702*37da2899SCharles.Forsyth syntax("bad number range"); 703*37da2899SCharles.Forsyth return (n1, n2); 704*37da2899SCharles.Forsyth} 705*37da2899SCharles.Forsyth 706*37da2899SCharles.Forsyth# walk the tree adjusting pointers to refer to 707*37da2899SCharles.Forsyth# next state of the finite state machine 708*37da2899SCharles.Forsyth 709*37da2899SCharles.Forsythwalk(r: refRex, succ: refRex, ar: ref Arena) 710*37da2899SCharles.Forsyth{ 711*37da2899SCharles.Forsyth if(r==NONE) 712*37da2899SCharles.Forsyth return; 713*37da2899SCharles.Forsyth rex := ar.rex[r]; 714*37da2899SCharles.Forsyth case rex.kind { 715*37da2899SCharles.Forsyth ALT => walk(rex.left, succ, ar); 716*37da2899SCharles.Forsyth walk(rex.right, succ, ar); 717*37da2899SCharles.Forsyth return; 718*37da2899SCharles.Forsyth CAT => walk(rex.left, rex.right, ar); 719*37da2899SCharles.Forsyth walk(rex.right, succ, ar); 720*37da2899SCharles.Forsyth ar.rex[r] = ar.rex[rex.left]; # optimization 721*37da2899SCharles.Forsyth return; 722*37da2899SCharles.Forsyth CLO or PCLO => 723*37da2899SCharles.Forsyth end := newRe(OPT, r, succ, nil, ar, 0, rex.greedy); # here's the circularity 724*37da2899SCharles.Forsyth walk(rex.left, end, ar); 725*37da2899SCharles.Forsyth OPT => walk(rex.left, succ, ar); 726*37da2899SCharles.Forsyth MNCLO => 727*37da2899SCharles.Forsyth ar.ptr++; 728*37da2899SCharles.Forsyth walk(rex.left, r, ar); 729*37da2899SCharles.Forsyth LCP => 730*37da2899SCharles.Forsyth ar.rex[r].left = newRe(IDLE, NONE, succ, nil, ar, 0, 0); 731*37da2899SCharles.Forsyth } 732*37da2899SCharles.Forsyth ar.rex[r].right = succ; 733*37da2899SCharles.Forsyth} 734*37da2899SCharles.Forsyth 735*37da2899SCharles.Forsythprtree(r: refRex, ar: ref Arena, done: list of int, ind: string): list of int 736*37da2899SCharles.Forsyth{ 737*37da2899SCharles.Forsyth sys->print("%s", ind); 738*37da2899SCharles.Forsyth if(r==NIL){ 739*37da2899SCharles.Forsyth sys->print("NIL\n"); 740*37da2899SCharles.Forsyth return done; 741*37da2899SCharles.Forsyth } 742*37da2899SCharles.Forsyth if(r==NONE){ 743*37da2899SCharles.Forsyth sys->print("NONE\n"); 744*37da2899SCharles.Forsyth return done; 745*37da2899SCharles.Forsyth } 746*37da2899SCharles.Forsyth printed := 0; 747*37da2899SCharles.Forsyth for(li := done; li != nil; li = tl li){ 748*37da2899SCharles.Forsyth if(hd li == r){ 749*37da2899SCharles.Forsyth printed = 1; 750*37da2899SCharles.Forsyth break; 751*37da2899SCharles.Forsyth } 752*37da2899SCharles.Forsyth } 753*37da2899SCharles.Forsyth rex := ar.rex[r]; 754*37da2899SCharles.Forsyth op := ""; 755*37da2899SCharles.Forsyth z := "Z"; 756*37da2899SCharles.Forsyth case rex.kind{ 757*37da2899SCharles.Forsyth ALT => op = "|"; 758*37da2899SCharles.Forsyth CAT => op = "and"; 759*37da2899SCharles.Forsyth DOT => op = "."; 760*37da2899SCharles.Forsyth SET => op = "[]"; 761*37da2899SCharles.Forsyth HAT => op = "^"; 762*37da2899SCharles.Forsyth DOL => op = "$"; 763*37da2899SCharles.Forsyth NUL => op = "NUL"; 764*37da2899SCharles.Forsyth PCLO => op = "+"; 765*37da2899SCharles.Forsyth CLO => op = "*"; 766*37da2899SCharles.Forsyth OPT => op = "?"; 767*37da2899SCharles.Forsyth LPN => op = "("; 768*37da2899SCharles.Forsyth RPN => op = ")"; 769*37da2899SCharles.Forsyth LPN0 => op = "?:"; 770*37da2899SCharles.Forsyth RPN0 => op = ":?"; 771*37da2899SCharles.Forsyth LPN1 => op = "?="; 772*37da2899SCharles.Forsyth RPN1 => op = "=?"; 773*37da2899SCharles.Forsyth LPN2 => op = "?!"; 774*37da2899SCharles.Forsyth RPN2 => op = "!?"; 775*37da2899SCharles.Forsyth BEET => op = "\\b"; 776*37da2899SCharles.Forsyth BEEF => op = "\\B"; 777*37da2899SCharles.Forsyth MNCLO => op = "{}"; 778*37da2899SCharles.Forsyth LCP => op = "n"; 779*37da2899SCharles.Forsyth IDLE => op = "i"; 780*37da2899SCharles.Forsyth * => z[0] = rex.kind; op = z; 781*37da2899SCharles.Forsyth } 782*37da2899SCharles.Forsyth if(printed){ 783*37da2899SCharles.Forsyth sys->print("node %d (%d)\n", r, r); 784*37da2899SCharles.Forsyth return done; 785*37da2899SCharles.Forsyth } 786*37da2899SCharles.Forsyth else{ 787*37da2899SCharles.Forsyth if(rex.ns != nil) 788*37da2899SCharles.Forsyth sys->print("%s [%d-%d] (%d)\n", op, rex.ns.m, rex.ns.n, r); 789*37da2899SCharles.Forsyth else 790*37da2899SCharles.Forsyth sys->print("%s (%d)\n", op, r); 791*37da2899SCharles.Forsyth done = r :: done; 792*37da2899SCharles.Forsyth ind += " "; 793*37da2899SCharles.Forsyth done = prtree(rex.left, ar, done, ind); 794*37da2899SCharles.Forsyth done = prtree(rex.right, ar, done, ind); 795*37da2899SCharles.Forsyth return done; 796*37da2899SCharles.Forsyth } 797*37da2899SCharles.Forsyth} 798*37da2899SCharles.Forsyth 799*37da2899SCharles.Forsythcompile(e: string, flag: int): (Re, string) 800*37da2899SCharles.Forsyth{ 801*37da2899SCharles.Forsyth if(e == nil) 802*37da2899SCharles.Forsyth return (nil, "missing expression"); 803*37da2899SCharles.Forsyth s := ref ReStr(e, 0, len e); 804*37da2899SCharles.Forsyth ar := ref Arena(array[2*s.n] of Rex, 0, 0, (flag&1)-1); 805*37da2899SCharles.Forsyth start := ar.start = re(s, ar); 806*37da2899SCharles.Forsyth if(start==NIL || s.n!=0) 807*37da2899SCharles.Forsyth syntax("invalid regular expression"); 808*37da2899SCharles.Forsyth walk(start, NIL, ar); 809*37da2899SCharles.Forsyth # prtree(start, ar, nil, ""); 810*37da2899SCharles.Forsyth if(ar.pno < 0) 811*37da2899SCharles.Forsyth ar.pno = 0; 812*37da2899SCharles.Forsyth return (ar, nil); 813*37da2899SCharles.Forsyth} 814*37da2899SCharles.Forsyth 815*37da2899SCharles.Forsyth# todo: queue for epsilon and advancing transitions 816*37da2899SCharles.Forsyth 817*37da2899SCharles.ForsythNum: adt{ 818*37da2899SCharles.Forsyth ns: ref Nstate; 819*37da2899SCharles.Forsyth m: int; 820*37da2899SCharles.Forsyth n: int; 821*37da2899SCharles.Forsyth}; 822*37da2899SCharles.ForsythGaz: adt { 823*37da2899SCharles.Forsyth pno: int; 824*37da2899SCharles.Forsyth beg: int; 825*37da2899SCharles.Forsyth end: int; 826*37da2899SCharles.Forsyth}; 827*37da2899SCharles.ForsythTrace: adt { 828*37da2899SCharles.Forsyth cre: refRex; # cursor in Re 829*37da2899SCharles.Forsyth trans: int; # 0 epsilon transition, 1 advancing transition 830*37da2899SCharles.Forsyth beg: int; # where this trace began; 831*37da2899SCharles.Forsyth end: int; # where this trace ended if success (-1 by default) 832*37da2899SCharles.Forsyth gaz: list of Gaz; 833*37da2899SCharles.Forsyth ns: list of ref Num; 834*37da2899SCharles.Forsyth}; 835*37da2899SCharles.ForsythQueue: adt { 836*37da2899SCharles.Forsyth ptr: int; 837*37da2899SCharles.Forsyth q: array of Trace; 838*37da2899SCharles.Forsyth}; 839*37da2899SCharles.Forsyth 840*37da2899SCharles.Forsythexecute(re: Re, s: string): array of (int, int) 841*37da2899SCharles.Forsyth{ 842*37da2899SCharles.Forsyth return executese(re, s, (-1,-1), 1, 1, 1, 0); 843*37da2899SCharles.Forsyth} 844*37da2899SCharles.Forsyth 845*37da2899SCharles.Forsythexecutese(re: Re, s: string, range: (int, int), bol: int, eol: int, multiline: int, ignorecase: int): array of (int,int) 846*37da2899SCharles.Forsyth{ 847*37da2899SCharles.Forsyth if(re==nil) 848*37da2899SCharles.Forsyth return nil; 849*37da2899SCharles.Forsyth (s0, s1) := range; 850*37da2899SCharles.Forsyth if(s0 < 0) 851*37da2899SCharles.Forsyth s0 = 0; 852*37da2899SCharles.Forsyth if(s1 < 0) 853*37da2899SCharles.Forsyth s1 = len s; 854*37da2899SCharles.Forsyth match := 0; 855*37da2899SCharles.Forsyth todo := ref Queue(0, array[2*re.ptr] of Trace); 856*37da2899SCharles.Forsyth for(i:=s0; i<=s1; i++) { 857*37da2899SCharles.Forsyth if(!match) # no leftmost match yet 858*37da2899SCharles.Forsyth todo.q[todo.ptr++] = Trace(re.start, 0, i, -1, nil, nil); 859*37da2899SCharles.Forsyth for(k:=0; k<todo.ptr; k++) { 860*37da2899SCharles.Forsyth q := todo.q[k]; 861*37da2899SCharles.Forsyth if(q.trans) 862*37da2899SCharles.Forsyth continue; 863*37da2899SCharles.Forsyth rex := re.rex[q.cre]; 864*37da2899SCharles.Forsyth next0 := next1 := next2 := NONE; 865*37da2899SCharles.Forsyth case rex.kind { 866*37da2899SCharles.Forsyth NUL => 867*37da2899SCharles.Forsyth next1 = rex.right; 868*37da2899SCharles.Forsyth DOT => 869*37da2899SCharles.Forsyth if(i<len s && !islt(s[i])) 870*37da2899SCharles.Forsyth next2 = rex.right; 871*37da2899SCharles.Forsyth HAT => 872*37da2899SCharles.Forsyth if(i == s0 && bol) 873*37da2899SCharles.Forsyth next1 = rex.right; 874*37da2899SCharles.Forsyth else if(multiline && i > 0 && islt(s[i-1])) 875*37da2899SCharles.Forsyth next1 = rex.right; 876*37da2899SCharles.Forsyth DOL => 877*37da2899SCharles.Forsyth if(i == s1 && eol) 878*37da2899SCharles.Forsyth next1 = rex.right; 879*37da2899SCharles.Forsyth else if(multiline && i < s1 && islt(s[i])) 880*37da2899SCharles.Forsyth next1 = rex.right; 881*37da2899SCharles.Forsyth SET => 882*37da2899SCharles.Forsyth if(i<len s && member(s[i], rex.set, ignorecase)) 883*37da2899SCharles.Forsyth next2 = rex.right; 884*37da2899SCharles.Forsyth CAT or 885*37da2899SCharles.Forsyth PCLO => 886*37da2899SCharles.Forsyth next1 = rex.left; 887*37da2899SCharles.Forsyth ALT or 888*37da2899SCharles.Forsyth CLO or 889*37da2899SCharles.Forsyth OPT => 890*37da2899SCharles.Forsyth if(rex.kind == ALT || rex.greedy){ 891*37da2899SCharles.Forsyth next0 = rex.left; 892*37da2899SCharles.Forsyth next1 = rex.right; 893*37da2899SCharles.Forsyth } 894*37da2899SCharles.Forsyth else{ 895*37da2899SCharles.Forsyth next0 = rex.right; 896*37da2899SCharles.Forsyth next1 = rex.left; 897*37da2899SCharles.Forsyth } 898*37da2899SCharles.Forsyth LPN => 899*37da2899SCharles.Forsyth next1 = rex.right; 900*37da2899SCharles.Forsyth q.gaz = Gaz(rex.pno,i,-1)::q.gaz; 901*37da2899SCharles.Forsyth RPN => 902*37da2899SCharles.Forsyth next1 = rex.right; 903*37da2899SCharles.Forsyth for(r:=q.gaz; ; r=tl r) { 904*37da2899SCharles.Forsyth (pno,beg1,end1) := hd r; 905*37da2899SCharles.Forsyth if(rex.pno==pno && end1==-1) { 906*37da2899SCharles.Forsyth q.gaz = Gaz(pno,beg1,i)::q.gaz; 907*37da2899SCharles.Forsyth break; 908*37da2899SCharles.Forsyth } 909*37da2899SCharles.Forsyth } 910*37da2899SCharles.Forsyth LPN0 or RPN0 or RPN1 or RPN2 => 911*37da2899SCharles.Forsyth next1 = rex.right; 912*37da2899SCharles.Forsyth LPN1 => 913*37da2899SCharles.Forsyth (rpn, nxt, nre) := storetree(q.cre, re); 914*37da2899SCharles.Forsyth m := executese(nre, s, (i, -1), bol, eol, multiline, ignorecase); 915*37da2899SCharles.Forsyth if(m != nil && m[0].t0 == i){ 916*37da2899SCharles.Forsyth next1 = nxt; 917*37da2899SCharles.Forsyth for(j := 1; j < len m; j++) 918*37da2899SCharles.Forsyth if(m[j].t0 >= 0) 919*37da2899SCharles.Forsyth q.gaz = Gaz(j, m[j].t0, m[j].t1)::q.gaz; 920*37da2899SCharles.Forsyth } 921*37da2899SCharles.Forsyth restoretree(LPN1, rpn, nxt, nre); 922*37da2899SCharles.Forsyth LPN2 => 923*37da2899SCharles.Forsyth (rpn, nxt, nre) := storetree(q.cre, re); 924*37da2899SCharles.Forsyth m := executese(nre, s, (i, -1), bol, eol, multiline, ignorecase); 925*37da2899SCharles.Forsyth if(m == nil || m[0].t0 != i) 926*37da2899SCharles.Forsyth next1 = nxt; 927*37da2899SCharles.Forsyth restoretree(LPN2, rpn, nxt, nre); 928*37da2899SCharles.Forsyth MNCLO => 929*37da2899SCharles.Forsyth num: ref Num; 930*37da2899SCharles.Forsyth 931*37da2899SCharles.Forsyth (q.ns, num) = nextn(q.cre, q.ns, rex.ns.m, rex.ns.n, re); 932*37da2899SCharles.Forsyth if(num.m > 0) 933*37da2899SCharles.Forsyth next1 = rex.left; 934*37da2899SCharles.Forsyth else if(num.n > 0){ 935*37da2899SCharles.Forsyth if(rex.greedy){ 936*37da2899SCharles.Forsyth next0 = rex.left; 937*37da2899SCharles.Forsyth next1 = rex.right; 938*37da2899SCharles.Forsyth } 939*37da2899SCharles.Forsyth else{ 940*37da2899SCharles.Forsyth next0 = rex.right; 941*37da2899SCharles.Forsyth next1 = rex.left; 942*37da2899SCharles.Forsyth } 943*37da2899SCharles.Forsyth } 944*37da2899SCharles.Forsyth else{ 945*37da2899SCharles.Forsyth next1 = rex.right; 946*37da2899SCharles.Forsyth (num.m, num.n) = (-1, -1); 947*37da2899SCharles.Forsyth } 948*37da2899SCharles.Forsyth LCP => 949*37da2899SCharles.Forsyth pno := rex.ns.m; 950*37da2899SCharles.Forsyth (beg1, end1) := lcpar(q.gaz, pno); 951*37da2899SCharles.Forsyth l := end1-beg1; 952*37da2899SCharles.Forsyth if(beg1 < 0) # undefined so succeeds 953*37da2899SCharles.Forsyth next1 = rex.right; 954*37da2899SCharles.Forsyth else if(i+l <= s1 && eqstr(s[beg1: end1], s[i: i+l], ignorecase)){ 955*37da2899SCharles.Forsyth (q.ns, nil) = nextn(rex.left, q.ns, l, l, re); 956*37da2899SCharles.Forsyth next1 = rex.left; # idle 957*37da2899SCharles.Forsyth } 958*37da2899SCharles.Forsyth IDLE => 959*37da2899SCharles.Forsyth num: ref Num; 960*37da2899SCharles.Forsyth 961*37da2899SCharles.Forsyth (q.ns, num) = nextn(q.cre, q.ns, -1, -1, re); 962*37da2899SCharles.Forsyth if(num.m >= 0) 963*37da2899SCharles.Forsyth next2 = q.cre; 964*37da2899SCharles.Forsyth else{ 965*37da2899SCharles.Forsyth next1 = rex.right; 966*37da2899SCharles.Forsyth (num.m, num.n) = (-1, -1); 967*37da2899SCharles.Forsyth } 968*37da2899SCharles.Forsyth BEET => 969*37da2899SCharles.Forsyth if(iswordc(s, i-1) != iswordc(s, i)) 970*37da2899SCharles.Forsyth next1 = rex.right; 971*37da2899SCharles.Forsyth BEEF => 972*37da2899SCharles.Forsyth if(iswordc(s, i-1) == iswordc(s, i)) 973*37da2899SCharles.Forsyth next1 = rex.right; 974*37da2899SCharles.Forsyth * => 975*37da2899SCharles.Forsyth if(i<len s && (rex.kind==s[i] || (ignorecase && eqcase(rex.kind, s[i])))) 976*37da2899SCharles.Forsyth next2 = rex.right; 977*37da2899SCharles.Forsyth } 978*37da2899SCharles.Forsyth l := k; 979*37da2899SCharles.Forsyth if(next0 != NONE) { 980*37da2899SCharles.Forsyth if(next0 != NIL) 981*37da2899SCharles.Forsyth (k, l) = insert(next0, 0, q.beg, -1, q.gaz, q.ns, todo, k, l); 982*37da2899SCharles.Forsyth else{ 983*37da2899SCharles.Forsyth match = 1; 984*37da2899SCharles.Forsyth (k, l) = insert(NIL, 2, q.beg, i, q.gaz, nil, todo, k, l); 985*37da2899SCharles.Forsyth } 986*37da2899SCharles.Forsyth } 987*37da2899SCharles.Forsyth if(next1 != NONE) { 988*37da2899SCharles.Forsyth if(next1 != NIL) 989*37da2899SCharles.Forsyth (k, l) = insert(next1, 0, q.beg, -1, q.gaz, q.ns, todo, k, l); 990*37da2899SCharles.Forsyth else{ 991*37da2899SCharles.Forsyth match = 1; 992*37da2899SCharles.Forsyth (k, l) = insert(NIL, 2, q.beg, i, q.gaz, nil, todo, k, l); 993*37da2899SCharles.Forsyth } 994*37da2899SCharles.Forsyth } 995*37da2899SCharles.Forsyth if(next2 != NONE) { 996*37da2899SCharles.Forsyth if(next2 != NIL) 997*37da2899SCharles.Forsyth (k, l) = insert(next2, 1, q.beg, -1, q.gaz, q.ns, todo, k, l); 998*37da2899SCharles.Forsyth else{ 999*37da2899SCharles.Forsyth match = 1; 1000*37da2899SCharles.Forsyth (k, l) = insert(NIL, 2, q.beg, i+1, q.gaz, nil, todo, k, l); 1001*37da2899SCharles.Forsyth } 1002*37da2899SCharles.Forsyth } 1003*37da2899SCharles.Forsyth } 1004*37da2899SCharles.Forsyth if(!atoe(todo) && match) 1005*37da2899SCharles.Forsyth break; 1006*37da2899SCharles.Forsyth } 1007*37da2899SCharles.Forsyth if(todo.ptr == 0) 1008*37da2899SCharles.Forsyth return nil; 1009*37da2899SCharles.Forsyth if(todo.ptr > 1) 1010*37da2899SCharles.Forsyth rfatal(sys->sprint("todo.ptr = %d", todo.ptr)); 1011*37da2899SCharles.Forsyth if(todo.q[0].trans != 2) 1012*37da2899SCharles.Forsyth rfatal(sys->sprint("trans = %d", todo.q[0].trans)); 1013*37da2899SCharles.Forsyth if(todo.q[0].cre != NIL) 1014*37da2899SCharles.Forsyth rfatal(sys->sprint("cre = %d", todo.q[0].cre)); 1015*37da2899SCharles.Forsyth beg := todo.q[0].beg; 1016*37da2899SCharles.Forsyth end := todo.q[0].end; 1017*37da2899SCharles.Forsyth gaz := todo.q[0].gaz; 1018*37da2899SCharles.Forsyth if(beg == -1) 1019*37da2899SCharles.Forsyth return nil; 1020*37da2899SCharles.Forsyth result := array[re.pno+1] of { 0 => (beg,end), * => (-1,-1) }; 1021*37da2899SCharles.Forsyth for( ; gaz!=nil; gaz=tl gaz) { 1022*37da2899SCharles.Forsyth (pno, beg1, end1) := hd gaz; 1023*37da2899SCharles.Forsyth (rbeg, nil) := result[pno]; 1024*37da2899SCharles.Forsyth if(rbeg==-1 && (beg1|end1)!=-1) 1025*37da2899SCharles.Forsyth result[pno] = (beg1,end1); 1026*37da2899SCharles.Forsyth } 1027*37da2899SCharles.Forsyth return result; 1028*37da2899SCharles.Forsyth} 1029*37da2899SCharles.Forsyth 1030*37da2899SCharles.Forsythbetter(newbeg, newend, oldbeg, oldend: int): int 1031*37da2899SCharles.Forsyth{ 1032*37da2899SCharles.Forsyth return oldbeg==-1 || newbeg<oldbeg || 1033*37da2899SCharles.Forsyth newbeg==oldbeg && newend>oldend; 1034*37da2899SCharles.Forsyth} 1035*37da2899SCharles.Forsyth 1036*37da2899SCharles.Forsythinsert(next: refRex, trans: int, tbeg: int, tend: int, tgaz: list of Gaz, tns: list of ref Num, todo: ref Queue, k: int, l: int): (int, int) 1037*37da2899SCharles.Forsyth{ 1038*37da2899SCharles.Forsyth# sys->print("insert %d eps=%d beg=%d end=%d (k, l) = (%d %d) => ", next, trans, tbeg, tend, k, l); 1039*37da2899SCharles.Forsyth for(j:=0; j<todo.ptr; j++){ 1040*37da2899SCharles.Forsyth if(todo.q[j].trans == trans){ 1041*37da2899SCharles.Forsyth if(todo.q[j].cre == next){ 1042*37da2899SCharles.Forsyth if(better(todo.q[j].beg, todo.q[j].end, tbeg, tend)) 1043*37da2899SCharles.Forsyth return (k, l); 1044*37da2899SCharles.Forsyth else if(better(tbeg, tend, todo.q[j].beg, todo.q[j].end)) 1045*37da2899SCharles.Forsyth break; 1046*37da2899SCharles.Forsyth else if(j < k) 1047*37da2899SCharles.Forsyth return (k, l); 1048*37da2899SCharles.Forsyth else 1049*37da2899SCharles.Forsyth break; 1050*37da2899SCharles.Forsyth } 1051*37da2899SCharles.Forsyth } 1052*37da2899SCharles.Forsyth } 1053*37da2899SCharles.Forsyth if(j < k){ 1054*37da2899SCharles.Forsyth k--; 1055*37da2899SCharles.Forsyth l--; 1056*37da2899SCharles.Forsyth } 1057*37da2899SCharles.Forsyth if(j < todo.ptr){ 1058*37da2899SCharles.Forsyth todo.q[j: ] = todo.q[j+1: todo.ptr]; 1059*37da2899SCharles.Forsyth todo.ptr--; 1060*37da2899SCharles.Forsyth } 1061*37da2899SCharles.Forsyth todo.q[l+2: ] = todo.q[l+1: todo.ptr]; 1062*37da2899SCharles.Forsyth todo.ptr++; 1063*37da2899SCharles.Forsyth todo.q[l+1] = Trace(next, trans, tbeg, tend, tgaz, tns); 1064*37da2899SCharles.Forsyth# for(j=0; j < todo.ptr; j++) sys->print("%d(%d) ", todo.q[j].cre, todo.q[j].trans); sys->print("\n"); 1065*37da2899SCharles.Forsyth return (k, l+1); 1066*37da2899SCharles.Forsyth} 1067*37da2899SCharles.Forsyth 1068*37da2899SCharles.Forsyth# remove epsilon transitions and move advancing transitions to epsilon ones 1069*37da2899SCharles.Forsythatoe(todo: ref Queue): int 1070*37da2899SCharles.Forsyth{ 1071*37da2899SCharles.Forsyth n := 0; 1072*37da2899SCharles.Forsyth for(j := 0; j < todo.ptr; j++){ 1073*37da2899SCharles.Forsyth if(todo.q[j].trans){ 1074*37da2899SCharles.Forsyth if(todo.q[j].trans == 1){ 1075*37da2899SCharles.Forsyth todo.q[j].trans = 0; 1076*37da2899SCharles.Forsyth n++; 1077*37da2899SCharles.Forsyth } 1078*37da2899SCharles.Forsyth } 1079*37da2899SCharles.Forsyth else{ 1080*37da2899SCharles.Forsyth todo.q[j: ] = todo.q[j+1: todo.ptr]; 1081*37da2899SCharles.Forsyth todo.ptr--; 1082*37da2899SCharles.Forsyth j--; 1083*37da2899SCharles.Forsyth } 1084*37da2899SCharles.Forsyth } 1085*37da2899SCharles.Forsyth return n; 1086*37da2899SCharles.Forsyth} 1087*37da2899SCharles.Forsyth 1088*37da2899SCharles.Forsythnextn(re: int, ln: list of ref Num, m: int, n: int, ar: ref Arena): (list of ref Num, ref Num) 1089*37da2899SCharles.Forsyth{ 1090*37da2899SCharles.Forsyth num: ref Num; 1091*37da2899SCharles.Forsyth 1092*37da2899SCharles.Forsyth ns := ar.rex[re].ns; 1093*37da2899SCharles.Forsyth for(l := ln; l != nil; l = tl l){ 1094*37da2899SCharles.Forsyth if((hd l).ns == ns){ 1095*37da2899SCharles.Forsyth num = hd l; 1096*37da2899SCharles.Forsyth break; 1097*37da2899SCharles.Forsyth } 1098*37da2899SCharles.Forsyth } 1099*37da2899SCharles.Forsyth if(num == nil) 1100*37da2899SCharles.Forsyth ln = (num = ref Num(ns, -1, -1)) :: ln; 1101*37da2899SCharles.Forsyth if(num.m == -1 && num.n == -1) 1102*37da2899SCharles.Forsyth (num.m, num.n) = (m, n); 1103*37da2899SCharles.Forsyth else 1104*37da2899SCharles.Forsyth (nil, nil) = (--num.m, --num.n); 1105*37da2899SCharles.Forsyth return (ln, num); 1106*37da2899SCharles.Forsyth} 1107*37da2899SCharles.Forsyth 1108*37da2899SCharles.ForsythASCII : con 128; 1109*37da2899SCharles.ForsythWORD : con 32; 1110*37da2899SCharles.Forsyth 1111*37da2899SCharles.Forsythmem(c: int, set: ref Set): int 1112*37da2899SCharles.Forsyth{ 1113*37da2899SCharles.Forsyth return (set.ascii[c/WORD]>>c%WORD)&1; 1114*37da2899SCharles.Forsyth} 1115*37da2899SCharles.Forsyth 1116*37da2899SCharles.Forsythmember(char: int, set: ref Set, ignorecase: int): int 1117*37da2899SCharles.Forsyth{ 1118*37da2899SCharles.Forsyth if(set.subset != nil){ 1119*37da2899SCharles.Forsyth for(l := set.subset; l != nil; l = tl l) 1120*37da2899SCharles.Forsyth if(member(char, hd l, ignorecase)) 1121*37da2899SCharles.Forsyth return !set.neg; 1122*37da2899SCharles.Forsyth } 1123*37da2899SCharles.Forsyth if(char < 128){ 1124*37da2899SCharles.Forsyth if(ignorecase) 1125*37da2899SCharles.Forsyth return (mem(tolower(char), set) || mem(toupper(char), set))^set.neg; 1126*37da2899SCharles.Forsyth else 1127*37da2899SCharles.Forsyth return ((set.ascii[char/WORD]>>char%WORD)&1)^set.neg; 1128*37da2899SCharles.Forsyth } 1129*37da2899SCharles.Forsyth for(l:=set.unicode; l!=nil; l=tl l) { 1130*37da2899SCharles.Forsyth (beg, end) := hd l; 1131*37da2899SCharles.Forsyth if(char>=beg && char<=end) 1132*37da2899SCharles.Forsyth return !set.neg; 1133*37da2899SCharles.Forsyth } 1134*37da2899SCharles.Forsyth return set.neg; 1135*37da2899SCharles.Forsyth} 1136*37da2899SCharles.Forsyth 1137*37da2899SCharles.ForsythnewSet(s: ref ReStr): ref Set 1138*37da2899SCharles.Forsyth{ 1139*37da2899SCharles.Forsyth op: int; 1140*37da2899SCharles.Forsyth set0: ref Set; 1141*37da2899SCharles.Forsyth 1142*37da2899SCharles.Forsyth set := ref Set(0, array[ASCII/WORD] of {* => 0}, nil, nil); 1143*37da2899SCharles.Forsyth if(s.peek() == '^') { 1144*37da2899SCharles.Forsyth set.neg = 1; 1145*37da2899SCharles.Forsyth s.next(); 1146*37da2899SCharles.Forsyth } 1147*37da2899SCharles.Forsyth while(s.n > 0) { 1148*37da2899SCharles.Forsyth char1 := s.next(); 1149*37da2899SCharles.Forsyth if(char1 == ']') 1150*37da2899SCharles.Forsyth return set; 1151*37da2899SCharles.Forsyth (char1, set0, op) = esc(s, char1, 1); 1152*37da2899SCharles.Forsyth if(set0 != nil) 1153*37da2899SCharles.Forsyth mergeset(set, set0); 1154*37da2899SCharles.Forsyth char2 := char1; 1155*37da2899SCharles.Forsyth if(s.peek() == '-') { 1156*37da2899SCharles.Forsyth if(set0 != nil) 1157*37da2899SCharles.Forsyth syntax("set in range"); 1158*37da2899SCharles.Forsyth s.next(); 1159*37da2899SCharles.Forsyth char2 = s.next(); 1160*37da2899SCharles.Forsyth if(char2 == ']') 1161*37da2899SCharles.Forsyth break; 1162*37da2899SCharles.Forsyth (char2, set0, op) = esc(s, char2, 1); 1163*37da2899SCharles.Forsyth if(set0 != nil) 1164*37da2899SCharles.Forsyth syntax("set in range"); 1165*37da2899SCharles.Forsyth if(char2 < char1) 1166*37da2899SCharles.Forsyth break; 1167*37da2899SCharles.Forsyth } 1168*37da2899SCharles.Forsyth addset(set, char1, char2); 1169*37da2899SCharles.Forsyth } 1170*37da2899SCharles.Forsyth syntax("bad set"); 1171*37da2899SCharles.Forsyth return nil; 1172*37da2899SCharles.Forsyth} 1173*37da2899SCharles.Forsyth 1174*37da2899SCharles.Forsythaddset(set: ref Set, c1: int, c2: int) 1175*37da2899SCharles.Forsyth{ 1176*37da2899SCharles.Forsyth for(c := c1; c <= c2; c++){ 1177*37da2899SCharles.Forsyth if(c < ASCII) 1178*37da2899SCharles.Forsyth set.ascii[c/WORD] |= 1<<c%WORD; 1179*37da2899SCharles.Forsyth else{ 1180*37da2899SCharles.Forsyth set.unicode = (c, c2) :: set.unicode; 1181*37da2899SCharles.Forsyth break; 1182*37da2899SCharles.Forsyth } 1183*37da2899SCharles.Forsyth } 1184*37da2899SCharles.Forsyth} 1185*37da2899SCharles.Forsyth 1186*37da2899SCharles.Forsythaddsets(set: ref Set, s: string) 1187*37da2899SCharles.Forsyth{ 1188*37da2899SCharles.Forsyth for(i := 0; i < len s; i++) 1189*37da2899SCharles.Forsyth addset(set, s[i], s[i]); 1190*37da2899SCharles.Forsyth} 1191*37da2899SCharles.Forsyth 1192*37da2899SCharles.Forsythmergeset(set: ref Set, set0: ref Set) 1193*37da2899SCharles.Forsyth{ 1194*37da2899SCharles.Forsyth if(!set0.neg){ 1195*37da2899SCharles.Forsyth for(i := 0; i < ASCII/WORD; i++) 1196*37da2899SCharles.Forsyth set.ascii[i] |= set0.ascii[i]; 1197*37da2899SCharles.Forsyth for(l := set0.unicode; l != nil; l = tl l) 1198*37da2899SCharles.Forsyth set.unicode = hd l :: set.unicode; 1199*37da2899SCharles.Forsyth } 1200*37da2899SCharles.Forsyth else 1201*37da2899SCharles.Forsyth set.subset = set0 :: set.subset; 1202*37da2899SCharles.Forsyth} 1203*37da2899SCharles.Forsyth 1204*37da2899SCharles.Forsythnewset(c1: int, c2: int): ref Set 1205*37da2899SCharles.Forsyth{ 1206*37da2899SCharles.Forsyth set := ref Set(0, array[ASCII/WORD] of {* => 0}, nil, nil); 1207*37da2899SCharles.Forsyth addset(set, c1, c2); 1208*37da2899SCharles.Forsyth return set; 1209*37da2899SCharles.Forsyth} 1210*37da2899SCharles.Forsyth 1211*37da2899SCharles.Forsythstoretree(lpn: int, re: ref Arena): (int, int, ref Arena) 1212*37da2899SCharles.Forsyth{ 1213*37da2899SCharles.Forsyth rpn: int; 1214*37da2899SCharles.Forsyth 1215*37da2899SCharles.Forsyth rex := re.rex[lpn]; 1216*37da2899SCharles.Forsyth k := rex.kind; 1217*37da2899SCharles.Forsyth l := 1; 1218*37da2899SCharles.Forsyth for(;;){ 1219*37da2899SCharles.Forsyth rpn = rex.right; 1220*37da2899SCharles.Forsyth rex = re.rex[rpn]; 1221*37da2899SCharles.Forsyth if(rex.kind == k) 1222*37da2899SCharles.Forsyth l++; 1223*37da2899SCharles.Forsyth else if(rex.kind == k+1 && --l == 0) 1224*37da2899SCharles.Forsyth break; 1225*37da2899SCharles.Forsyth } 1226*37da2899SCharles.Forsyth re.rex[lpn].kind = LPN; 1227*37da2899SCharles.Forsyth re.rex[rpn].kind = RPN; 1228*37da2899SCharles.Forsyth nxt := re.rex[rpn].right; 1229*37da2899SCharles.Forsyth re.rex[rpn].right = NIL; 1230*37da2899SCharles.Forsyth nre := ref *re; 1231*37da2899SCharles.Forsyth nre.start = lpn; 1232*37da2899SCharles.Forsyth return (rpn, nxt, nre); 1233*37da2899SCharles.Forsyth} 1234*37da2899SCharles.Forsyth 1235*37da2899SCharles.Forsythrestoretree(lop: int, rpn: int, nxt: int, re: ref Arena) 1236*37da2899SCharles.Forsyth{ 1237*37da2899SCharles.Forsyth lpn := re.start; 1238*37da2899SCharles.Forsyth re.rex[lpn].kind = lop; 1239*37da2899SCharles.Forsyth re.rex[rpn].kind = lop+1; 1240*37da2899SCharles.Forsyth re.rex[rpn].right = nxt; 1241*37da2899SCharles.Forsyth} 1242*37da2899SCharles.Forsyth 1243*37da2899SCharles.Forsythiswordc(s: string, i: int): int 1244*37da2899SCharles.Forsyth{ 1245*37da2899SCharles.Forsyth if(i < 0 || i >= len s) 1246*37da2899SCharles.Forsyth return 0; 1247*37da2899SCharles.Forsyth c := s[i]; 1248*37da2899SCharles.Forsyth return isdigit(c) || isalpha(c) || c == '_'; 1249*37da2899SCharles.Forsyth} 1250*37da2899SCharles.Forsyth 1251*37da2899SCharles.Forsythlcpar(gaz: list of Gaz, pno: int): (int, int) 1252*37da2899SCharles.Forsyth{ 1253*37da2899SCharles.Forsyth for(r := gaz; r != nil; r = tl r) { 1254*37da2899SCharles.Forsyth (pno1, beg1, end1) := hd r; 1255*37da2899SCharles.Forsyth if(pno == pno1) 1256*37da2899SCharles.Forsyth return (beg1, end1); 1257*37da2899SCharles.Forsyth } 1258*37da2899SCharles.Forsyth return (-1, -1); 1259*37da2899SCharles.Forsyth} 1260*37da2899SCharles.Forsyth 1261*37da2899SCharles.Forsytheqstr(s: string, t: string, ic: int): int 1262*37da2899SCharles.Forsyth{ 1263*37da2899SCharles.Forsyth if(!ic) 1264*37da2899SCharles.Forsyth return s == t; 1265*37da2899SCharles.Forsyth if(len s != len t) 1266*37da2899SCharles.Forsyth return 0; 1267*37da2899SCharles.Forsyth for(i := 0; i < len s; i++) 1268*37da2899SCharles.Forsyth if(!eqcase(s[i], t[i])) 1269*37da2899SCharles.Forsyth return 0; 1270*37da2899SCharles.Forsyth return 1; 1271*37da2899SCharles.Forsyth} 1272*37da2899SCharles.Forsyth 1273*37da2899SCharles.Forsytheqcase(c1: int, c2: int): int 1274*37da2899SCharles.Forsyth{ 1275*37da2899SCharles.Forsyth return toupper(c1) == toupper(c2); 1276*37da2899SCharles.Forsyth} 1277*37da2899SCharles.Forsyth 1278*37da2899SCharles.Forsythsyntax(s: string) 1279*37da2899SCharles.Forsyth{ 1280*37da2899SCharles.Forsyth runtime(regex, SyntaxError, s); 1281*37da2899SCharles.Forsyth} 1282*37da2899SCharles.Forsyth 1283*37da2899SCharles.Forsythrfatal(s: string) 1284*37da2899SCharles.Forsyth{ 1285*37da2899SCharles.Forsyth runtime(regex, InternalError, s); 1286*37da2899SCharles.Forsyth} 1287