1 #include <u.h> 2 #include <libc.h> 3 #include <draw.h> 4 #include <html.h> 5 #include "impl.h" 6 7 Rune* whitespace = L" \t\n\r"; 8 Rune* notwhitespace = L"^ \t\n\r"; 9 10 // All lists start out like List structure. 11 // List itself can be used as list of int. 12 int 13 _listlen(List* l) 14 { 15 int n = 0; 16 17 while(l != nil) { 18 l = l->next; 19 n++; 20 } 21 return n; 22 } 23 24 // Cons 25 List* 26 _newlist(int val, List* rest) 27 { 28 List* ans; 29 30 ans = (List*)emalloc(sizeof(List)); 31 ans->val = val; 32 ans->next = rest; 33 return ans; 34 } 35 36 // Reverse a list in place 37 List* 38 _revlist(List* l) 39 { 40 List* newl; 41 List* nextl; 42 43 newl = nil; 44 while(l != nil) { 45 nextl = l->next; 46 l->next = newl; 47 newl = l; 48 l = nextl; 49 } 50 return newl; 51 } 52 53 // The next few routines take a "character class" as argument. 54 // e.g., "a-zA-Z", or "^ \t\n" 55 // (ranges indicated by - except in first position; 56 // ^ is first position means "not in" the following class) 57 58 // Splitl splits s[0:n] just before first character of class cl. 59 // Answers go in (p1, n1) and (p2, n2). 60 // If no split, the whole thing goes in the first component. 61 // Note: answers contain pointers into original string. 62 void 63 _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) 64 { 65 Rune* p; 66 67 p = _Strnclass(s, cl, n); 68 *p1 = s; 69 if(p == nil) { 70 *n1 = n; 71 *p2 = nil; 72 *n2 = 0; 73 } 74 else { 75 *p2 = p; 76 *n1 = p-s; 77 *n2 = n-*n1; 78 } 79 } 80 81 // Splitr splits s[0:n] just after last character of class cl. 82 // Answers go in (p1, n1) and (p2, n2). 83 // If no split, the whole thing goes in the last component. 84 // Note: answers contain pointers into original string. 85 void 86 _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) 87 { 88 Rune* p; 89 90 p = _Strnrclass(s, cl, n); 91 if(p == nil) { 92 *p1 = nil; 93 *n1 = 0; 94 *p2 = s; 95 *n2 = n; 96 } 97 else { 98 *p1 = s; 99 *p2 = p+1; 100 *n1 = *p2-s; 101 *n2 = n-*n1; 102 } 103 } 104 105 // Splitall splits s[0:n] into parts that are separated by characters from class cl. 106 // Each part will have nonzero length. 107 // At most alen parts are found, and pointers to their starts go into 108 // the strarr array, while their lengths go into the lenarr array. 109 // The return value is the number of parts found. 110 int 111 _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen) 112 { 113 int i; 114 Rune* p; 115 Rune* q; 116 Rune* slast; 117 118 if(s == nil || n == 0) 119 return 0; 120 i = 0; 121 p = s; 122 slast = s+n; 123 while(p < slast && i < alen) { 124 while(p < slast && _inclass(*p, cl)) 125 p++; 126 if(p == slast) 127 break; 128 q = _Strnclass(p, cl, slast-p); 129 if(q == nil) 130 q = slast; 131 assert(q > p && q <= slast); 132 strarr[i] = p; 133 lenarr[i] = q-p; 134 i++; 135 p = q; 136 } 137 return i; 138 } 139 140 // Find part of s that excludes leading and trailing whitespace, 141 // and return that part in *pans (and its length in *panslen). 142 void 143 _trimwhite(Rune* s, int n, Rune** pans, int* panslen) 144 { 145 Rune* p; 146 Rune* q; 147 148 p = nil; 149 if(n > 0) { 150 p = _Strnclass(s, notwhitespace, n); 151 if(p != nil) { 152 q = _Strnrclass(s, notwhitespace, n); 153 assert(q != nil); 154 n = q+1-p; 155 } 156 } 157 *pans = p; 158 *panslen = n; 159 } 160 161 // _Strclass returns a pointer to the first element of s that is 162 // a member of class cl, nil if none. 163 Rune* 164 _Strclass(Rune* s, Rune* cl) 165 { 166 Rune* p; 167 168 for(p = s; *p != 0; p++) 169 if(_inclass(*p, cl)) 170 return p; 171 return nil; 172 } 173 174 // _Strnclass returns a pointer to the first element of s[0:n] that is 175 // a member of class cl, nil if none. 176 Rune* 177 _Strnclass(Rune* s, Rune* cl, int n) 178 { 179 Rune* p; 180 181 for(p = s; n-- && *p != 0; p++) 182 if(_inclass(*p, cl)) 183 return p; 184 return nil; 185 } 186 187 // _Strrclass returns a pointer to the last element of s that is 188 // a member of class cl, nil if none 189 Rune* 190 _Strrclass(Rune* s, Rune* cl) 191 { 192 Rune* p; 193 194 if(s == nil || *s == 0) 195 return nil; 196 p = s + runestrlen(s) - 1; 197 while(p >= s) { 198 if(_inclass(*p, cl)) 199 return p; 200 p--; 201 }; 202 return nil; 203 } 204 205 // _Strnrclass returns a pointer to the last element of s[0:n] that is 206 // a member of class cl, nil if none 207 Rune* 208 _Strnrclass(Rune* s, Rune* cl, int n) 209 { 210 Rune* p; 211 212 if(s == nil || *s == 0 || n == 0) 213 return nil; 214 p = s + n - 1; 215 while(p >= s) { 216 if(_inclass(*p, cl)) 217 return p; 218 p--; 219 }; 220 return nil; 221 } 222 223 // Is c in the class cl? 224 int 225 _inclass(Rune c, Rune* cl) 226 { 227 int n; 228 int ans; 229 int negate; 230 int i; 231 232 n = runestrlen(cl); 233 if(n == 0) 234 return 0; 235 ans = 0; 236 negate = 0; 237 if(cl[0] == '^') { 238 negate = 1; 239 cl++; 240 n--; 241 } 242 for(i = 0; i < n; i++) { 243 if(cl[i] == '-' && i > 0 && i < n - 1) { 244 if(c >= cl[i - 1] && c <= cl[i + 1]) { 245 ans = 1; 246 break; 247 } 248 i++; 249 } 250 else if(c == cl[i]) { 251 ans = 1; 252 break; 253 } 254 } 255 if(negate) 256 ans = !ans; 257 return ans; 258 } 259 260 // Is pre a prefix of s? 261 int 262 _prefix(Rune* pre, Rune* s) 263 { 264 int ns; 265 int n; 266 int k; 267 268 ns = runestrlen(s); 269 n = runestrlen(pre); 270 if(ns < n) 271 return 0; 272 for(k = 0; k < n; k++) { 273 if(pre[k] != s[k]) 274 return 0; 275 } 276 return 1; 277 } 278 279 // Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars). 280 // Also, do a case-insensitive match, assuming s2 281 // has no chars in [A-Z], only their lowercase versions. 282 // (This routine is used for in-place keyword lookup, where s2 is in a keyword 283 // list and s1 is some substring, possibly mixed-case, in a buffer.) 284 int 285 _Strncmpci(Rune *s1, int n1, Rune *s2) 286 { 287 Rune c1, c2; 288 289 for(;;) { 290 if(n1-- == 0) { 291 if(*s2 == 0) 292 return 0; 293 return -1; 294 } 295 c1 = *s1++; 296 c2 = *s2++; 297 if(c1 >= 'A' && c1 <= 'Z') 298 c1 = c1 - 'A' + 'a'; 299 if(c1 != c2) { 300 if(c1 > c2) 301 return 1; 302 return -1; 303 } 304 } 305 } 306 307 // emalloc and copy 308 Rune* 309 _Strdup(Rune* s) 310 { 311 if(s == nil) 312 return nil; 313 return _Strndup(s, runestrlen(s)); 314 } 315 316 // emalloc and copy n chars of s (assume s is at least that long), 317 // and add 0 terminator. 318 // Return nil if n==0. 319 Rune* 320 _Strndup(Rune* s, int n) 321 { 322 Rune* ans; 323 324 if(n <= 0) 325 return nil; 326 ans = _newstr(n); 327 memmove(ans, s, n*sizeof(Rune)); 328 ans[n] = 0; 329 return ans; 330 } 331 // emalloc enough room for n Runes, plus 1 null terminator. 332 // (Not initialized to anything.) 333 Rune* 334 _newstr(int n) 335 { 336 return (Rune*)emalloc((n+1)*sizeof(Rune)); 337 } 338 339 // emalloc and copy s+t 340 Rune* 341 _Strdup2(Rune* s, Rune* t) 342 { 343 int ns, nt; 344 Rune* ans; 345 Rune* p; 346 347 ns = runestrlen(s); 348 nt = runestrlen(t); 349 if(ns+nt == 0) 350 return nil; 351 ans = _newstr(ns+nt); 352 p = _Stradd(ans, s, ns); 353 p = _Stradd(p, t, nt); 354 *p = 0; 355 return ans; 356 } 357 358 // Return emalloc'd substring s[start:stop], 359 Rune* 360 _Strsubstr(Rune* s, int start, int stop) 361 { 362 Rune* t; 363 364 if(start == stop) 365 return nil; 366 t = _Strndup(s+start, stop-start); 367 return t; 368 } 369 370 // Copy n chars to s1 from s2, and return s1+n 371 Rune* 372 _Stradd(Rune* s1, Rune* s2, int n) 373 { 374 if(n == 0) 375 return s1; 376 memmove(s1, s2, n*sizeof(Rune)); 377 return s1+n; 378 } 379 380 // Like strtol, but converting from Rune* string 381 382 #define LONG_MAX 2147483647L 383 #define LONG_MIN -2147483648L 384 385 long 386 _Strtol(Rune* nptr, Rune** endptr, int base) 387 { 388 Rune* p; 389 long n, nn; 390 int c, ovfl, v, neg, ndig; 391 392 p = nptr; 393 neg = 0; 394 n = 0; 395 ndig = 0; 396 ovfl = 0; 397 398 /* 399 * White space 400 */ 401 for(;;p++){ 402 switch(*p){ 403 case ' ': 404 case '\t': 405 case '\n': 406 case '\f': 407 case '\r': 408 case '\v': 409 continue; 410 } 411 break; 412 } 413 414 /* 415 * Sign 416 */ 417 if(*p=='-' || *p=='+') 418 if(*p++ == '-') 419 neg = 1; 420 421 /* 422 * Base 423 */ 424 if(base==0){ 425 if(*p != '0') 426 base = 10; 427 else{ 428 base = 8; 429 if(p[1]=='x' || p[1]=='X'){ 430 p += 2; 431 base = 16; 432 } 433 } 434 }else if(base==16 && *p=='0'){ 435 if(p[1]=='x' || p[1]=='X') 436 p += 2; 437 }else if(base<0 || 36<base) 438 goto Return; 439 440 /* 441 * Non-empty sequence of digits 442 */ 443 for(;; p++,ndig++){ 444 c = *p; 445 v = base; 446 if('0'<=c && c<='9') 447 v = c - '0'; 448 else if('a'<=c && c<='z') 449 v = c - 'a' + 10; 450 else if('A'<=c && c<='Z') 451 v = c - 'A' + 10; 452 if(v >= base) 453 break; 454 nn = n*base + v; 455 if(nn < n) 456 ovfl = 1; 457 n = nn; 458 } 459 460 Return: 461 if(ndig == 0) 462 p = nptr; 463 if(endptr) 464 *endptr = p; 465 if(ovfl){ 466 if(neg) 467 return LONG_MIN; 468 return LONG_MAX; 469 } 470 if(neg) 471 return -n; 472 return n; 473 } 474 475 // Convert buf[0:n], bytes whose character set is chset, 476 // into a emalloc'd null-terminated Unicode string. 477 Rune* 478 toStr(uchar* buf, int n, int chset) 479 { 480 int i; 481 int m; 482 Rune ch; 483 Rune* ans; 484 485 switch(chset) { 486 case US_Ascii: 487 case ISO_8859_1: 488 ans = (Rune*)emalloc((n+1)*sizeof(Rune)); 489 for(i = 0; i < n; i++) 490 ans[i] = buf[i]; 491 ans[n] = 0; 492 break; 493 494 case UTF_8: 495 m = 0; 496 for(i = 0; i < n; ) { 497 i += chartorune(&ch, (char*)(buf+i)); 498 m++; 499 } 500 ans = (Rune*)emalloc((m+1)*sizeof(Rune)); 501 m = 0; 502 for(i = 0; i < n; ) { 503 i += chartorune(&ch, (char*)(buf+i)); 504 ans[m++] = ch; 505 } 506 ans[m] = 0; 507 break; 508 509 default: 510 ans = nil; 511 assert(0); 512 } 513 return ans; 514 } 515 516 // Convert buf[0:n], Unicode characters, 517 // into an emalloc'd null-terminated string in character set chset. 518 // Use 0x80 for unconvertable characters. 519 uchar* 520 fromStr(Rune* buf, int n, int chset) 521 { 522 uchar* ans; 523 int i, lim, m; 524 Rune ch; 525 uchar* p; 526 uchar s[UTFmax]; 527 528 ans = nil; 529 switch(chset) { 530 case US_Ascii: 531 case ISO_8859_1: 532 ans = (uchar*)emalloc(n+1); 533 lim = (chset==US_Ascii)? 127 : 255; 534 for(i = 0; i < n; i++) { 535 ch = buf[i]; 536 if(ch > lim) 537 ch = 0x80; 538 ans[i] = ch; 539 } 540 ans[n] = 0; 541 break; 542 543 case UTF_8: 544 m = 0; 545 for(i = 0; i < n; i++) { 546 m += runetochar((char*)s, &buf[i]); 547 } 548 ans = (uchar*)emalloc(m+1); 549 p = ans; 550 for(i = 0; i < n; i++) 551 p += runetochar((char*)p, &buf[i]); 552 *p = 0; 553 break; 554 555 default: 556 assert(0); 557 } 558 return ans; 559 560 } 561 562 // Convert n to emalloc'd String. 563 Rune* 564 _ltoStr(int n) 565 { 566 int m; 567 uchar buf[20]; 568 569 m = snprint((char*)buf, sizeof(buf), "%d", n); 570 return toStr(buf, m, US_Ascii); 571 } 572