1 #include <u.h> 2 #include <libc.h> 3 #include <draw.h> 4 #include <html.h> 5 #include "impl.h" 6 7 Rune* whitespace = L" \t\n\r"; 8 Rune* notwhitespace = L"^ \t\n\r"; 9 10 // All lists start out like List structure. 11 // List itself can be used as list of int. 12 int 13 _listlen(List* l) 14 { 15 int n = 0; 16 17 while(l != nil) { 18 l = l->next; 19 n++; 20 } 21 return n; 22 } 23 24 // Cons 25 List* 26 _newlist(int val, List* rest) 27 { 28 List* ans; 29 30 ans = (List*)emalloc(sizeof(List)); 31 ans->val = val; 32 ans->next = rest; 33 return ans; 34 } 35 36 // Reverse a list in place 37 List* 38 _revlist(List* l) 39 { 40 List* newl; 41 List* nextl; 42 43 newl = nil; 44 while(l != nil) { 45 nextl = l->next; 46 l->next = newl; 47 newl = l; 48 l = nextl; 49 } 50 return newl; 51 } 52 53 // The next few routines take a "character class" as argument. 54 // e.g., "a-zA-Z", or "^ \t\n" 55 // (ranges indicated by - except in first position; 56 // ^ is first position means "not in" the following class) 57 58 // Splitl splits s[0:n] just before first character of class cl. 59 // Answers go in (p1, n1) and (p2, n2). 60 // If no split, the whole thing goes in the first component. 61 // Note: answers contain pointers into original string. 62 void 63 _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) 64 { 65 Rune* p; 66 67 p = _Strnclass(s, cl, n); 68 *p1 = s; 69 if(p == nil) { 70 *n1 = n; 71 *p2 = nil; 72 *n2 = 0; 73 } 74 else { 75 *p2 = p; 76 *n1 = p-s; 77 *n2 = n-*n1; 78 } 79 } 80 81 // Splitr splits s[0:n] just after last character of class cl. 82 // Answers go in (p1, n1) and (p2, n2). 83 // If no split, the whole thing goes in the last component. 84 // Note: answers contain pointers into original string. 85 void 86 _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2) 87 { 88 Rune* p; 89 90 p = _Strnrclass(s, cl, n); 91 if(p == nil) { 92 *p1 = nil; 93 *n1 = 0; 94 *p2 = s; 95 *n2 = n; 96 } 97 else { 98 *p1 = s; 99 *p2 = p+1; 100 *n1 = *p2-s; 101 *n2 = n-*n1; 102 } 103 } 104 105 // Splitall splits s[0:n] into parts that are separated by characters from class cl. 106 // Each part will have nonzero length. 107 // At most alen parts are found, and pointers to their starts go into 108 // the strarr array, while their lengths go into the lenarr array. 109 // The return value is the number of parts found. 110 int 111 _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen) 112 { 113 int i; 114 Rune* p; 115 Rune* q; 116 Rune* slast; 117 118 if(s == nil || n == 0) 119 return 0; 120 i = 0; 121 p = s; 122 slast = s+n; 123 while(p < slast && i < alen) { 124 while(p < slast && _inclass(*p, cl)) 125 p++; 126 if(p == slast) 127 break; 128 q = _Strnclass(p, cl, slast-p); 129 if(q == nil) 130 q = slast; 131 assert(q > p && q <= slast); 132 strarr[i] = p; 133 lenarr[i] = q-p; 134 i++; 135 p = q; 136 } 137 return i; 138 } 139 140 // Find part of s that excludes leading and trailing whitespace, 141 // and return that part in *pans (and its length in *panslen). 142 void 143 _trimwhite(Rune* s, int n, Rune** pans, int* panslen) 144 { 145 Rune* p; 146 Rune* q; 147 148 p = nil; 149 if(n > 0) { 150 p = _Strnclass(s, notwhitespace, n); 151 if(p != nil) { 152 q = _Strnrclass(s, notwhitespace, n); 153 assert(q != nil); 154 n = q+1-p; 155 } 156 } 157 *pans = p; 158 *panslen = n; 159 } 160 161 // _Strclass returns a pointer to the first element of s that is 162 // a member of class cl, nil if none. 163 Rune* 164 _Strclass(Rune* s, Rune* cl) 165 { 166 Rune* p; 167 168 for(p = s; *p != 0; p++) 169 if(_inclass(*p, cl)) 170 return p; 171 return nil; 172 } 173 174 // _Strnclass returns a pointer to the first element of s[0:n] that is 175 // a member of class cl, nil if none. 176 Rune* 177 _Strnclass(Rune* s, Rune* cl, int n) 178 { 179 Rune* p; 180 181 for(p = s; n-- && *p != 0; p++) 182 if(_inclass(*p, cl)) 183 return p; 184 return nil; 185 } 186 187 // _Strrclass returns a pointer to the last element of s that is 188 // a member of class cl, nil if none 189 Rune* 190 _Strrclass(Rune* s, Rune* cl) 191 { 192 Rune* p; 193 194 if(s == nil || *s == 0) 195 return nil; 196 p = s + runestrlen(s) - 1; 197 while(p >= s) { 198 if(_inclass(*p, cl)) 199 return p; 200 p--; 201 }; 202 return nil; 203 } 204 205 // _Strnrclass returns a pointer to the last element of s[0:n] that is 206 // a member of class cl, nil if none 207 Rune* 208 _Strnrclass(Rune* s, Rune* cl, int n) 209 { 210 Rune* p; 211 212 if(s == nil || *s == 0 || n == 0) 213 return nil; 214 p = s + n - 1; 215 while(p >= s) { 216 if(_inclass(*p, cl)) 217 return p; 218 p--; 219 }; 220 return nil; 221 } 222 223 // Is c in the class cl? 224 int 225 _inclass(Rune c, Rune* cl) 226 { 227 int n; 228 int ans; 229 int negate; 230 int i; 231 232 n = _Strlen(cl); 233 if(n == 0) 234 return 0; 235 ans = 0; 236 negate = 0; 237 if(cl[0] == '^') { 238 negate = 1; 239 cl++; 240 n--; 241 } 242 for(i = 0; i < n; i++) { 243 if(cl[i] == '-' && i > 0 && i < n - 1) { 244 if(c >= cl[i - 1] && c <= cl[i + 1]) { 245 ans = 1; 246 break; 247 } 248 i++; 249 } 250 else if(c == cl[i]) { 251 ans = 1; 252 break; 253 } 254 } 255 if(negate) 256 ans = !ans; 257 return ans; 258 } 259 260 // Is pre a prefix of s? 261 int 262 _prefix(Rune* pre, Rune* s) 263 { 264 int ns; 265 int n; 266 int k; 267 268 ns = _Strlen(s); 269 n = _Strlen(pre); 270 if(ns < n) 271 return 0; 272 for(k = 0; k < n; k++) { 273 if(pre[k] != s[k]) 274 return 0; 275 } 276 return 1; 277 } 278 279 // Number of runes in (null-terminated) s 280 int 281 _Strlen(Rune* s) 282 { 283 if(s == nil) 284 return 0; 285 return runestrlen(s); 286 } 287 288 // -1, 0, 1 as s1 is lexicographically less, equal greater than s2 289 int 290 _Strcmp(Rune *s1, Rune *s2) 291 { 292 if(s1 == nil) 293 return (s2 == nil || *s2 == 0) ? 0 : -1; 294 if(s2 == nil) 295 return (*s1 == 0) ? 0 : 1; 296 return runestrcmp(s1, s2); 297 } 298 299 // Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars). 300 // Also, do a case-insensitive match, assuming s2 301 // has no chars in [A-Z], only their lowercase versions. 302 // (This routine is used for in-place keyword lookup, where s2 is in a keyword 303 // list and s1 is some substring, possibly mixed-case, in a buffer.) 304 int 305 _Strncmpci(Rune *s1, int n1, Rune *s2) 306 { 307 Rune c1, c2; 308 309 for(;;) { 310 if(n1-- == 0) { 311 if(*s2 == 0) 312 return 0; 313 return -1; 314 } 315 c1 = *s1++; 316 c2 = *s2++; 317 if(c1 >= 'A' && c1 <= 'Z') 318 c1 = c1 - 'A' + 'a'; 319 if(c1 != c2) { 320 if(c1 > c2) 321 return 1; 322 return -1; 323 } 324 } 325 } 326 327 // emalloc and copy 328 Rune* 329 _Strdup(Rune* s) 330 { 331 if(s == nil) 332 return nil; 333 return _Strndup(s, runestrlen(s)); 334 } 335 336 // emalloc and copy n chars of s (assume s is at least that long), 337 // and add 0 terminator. 338 // Return nil if n==0. 339 Rune* 340 _Strndup(Rune* s, int n) 341 { 342 Rune* ans; 343 344 if(n <= 0) 345 return nil; 346 ans = _newstr(n); 347 memmove(ans, s, n*sizeof(Rune)); 348 ans[n] = 0; 349 return ans; 350 } 351 // emalloc enough room for n Runes, plus 1 null terminator. 352 // (Not initialized to anything.) 353 Rune* 354 _newstr(int n) 355 { 356 return (Rune*)emalloc((n+1)*sizeof(Rune)); 357 } 358 359 // emalloc and copy s+t 360 Rune* 361 _Strdup2(Rune* s, Rune* t) 362 { 363 int ns, nt; 364 Rune* ans; 365 Rune* p; 366 367 ns = _Strlen(s); 368 nt = _Strlen(t); 369 if(ns+nt == 0) 370 return nil; 371 ans = _newstr(ns+nt); 372 p = _Stradd(ans, s, ns); 373 p = _Stradd(p, t, nt); 374 *p = 0; 375 return ans; 376 } 377 378 // Return emalloc'd substring s[start:stop], 379 Rune* 380 _Strsubstr(Rune* s, int start, int stop) 381 { 382 Rune* t; 383 384 if(start == stop) 385 return nil; 386 t = _Strndup(s+start, stop-start); 387 return t; 388 } 389 390 // Copy n chars to s1 from s2, and return s1+n 391 Rune* 392 _Stradd(Rune* s1, Rune* s2, int n) 393 { 394 if(n == 0) 395 return s1; 396 memmove(s1, s2, n*sizeof(Rune)); 397 return s1+n; 398 } 399 400 // Like strtol, but converting from Rune* string 401 402 #define LONG_MAX 2147483647L 403 #define LONG_MIN -2147483648L 404 405 long 406 _Strtol(Rune* nptr, Rune** endptr, int base) 407 { 408 Rune* p; 409 long n, nn; 410 int c, ovfl, v, neg, ndig; 411 412 p = nptr; 413 neg = 0; 414 n = 0; 415 ndig = 0; 416 ovfl = 0; 417 418 /* 419 * White space 420 */ 421 for(;;p++){ 422 switch(*p){ 423 case ' ': 424 case '\t': 425 case '\n': 426 case '\f': 427 case '\r': 428 case '\v': 429 continue; 430 } 431 break; 432 } 433 434 /* 435 * Sign 436 */ 437 if(*p=='-' || *p=='+') 438 if(*p++ == '-') 439 neg = 1; 440 441 /* 442 * Base 443 */ 444 if(base==0){ 445 if(*p != '0') 446 base = 10; 447 else{ 448 base = 8; 449 if(p[1]=='x' || p[1]=='X'){ 450 p += 2; 451 base = 16; 452 } 453 } 454 }else if(base==16 && *p=='0'){ 455 if(p[1]=='x' || p[1]=='X') 456 p += 2; 457 }else if(base<0 || 36<base) 458 goto Return; 459 460 /* 461 * Non-empty sequence of digits 462 */ 463 for(;; p++,ndig++){ 464 c = *p; 465 v = base; 466 if('0'<=c && c<='9') 467 v = c - '0'; 468 else if('a'<=c && c<='z') 469 v = c - 'a' + 10; 470 else if('A'<=c && c<='Z') 471 v = c - 'A' + 10; 472 if(v >= base) 473 break; 474 nn = n*base + v; 475 if(nn < n) 476 ovfl = 1; 477 n = nn; 478 } 479 480 Return: 481 if(ndig == 0) 482 p = nptr; 483 if(endptr) 484 *endptr = p; 485 if(ovfl){ 486 if(neg) 487 return LONG_MIN; 488 return LONG_MAX; 489 } 490 if(neg) 491 return -n; 492 return n; 493 } 494 495 // Convert buf[0:n], bytes whose character set is chset, 496 // into a emalloc'd null-terminated Unicode string. 497 Rune* 498 toStr(uchar* buf, int n, int chset) 499 { 500 int i; 501 int m; 502 Rune ch; 503 Rune* ans; 504 505 switch(chset) { 506 case US_Ascii: 507 case ISO_8859_1: 508 ans = (Rune*)emalloc((n+1)*sizeof(Rune)); 509 for(i = 0; i < n; i++) 510 ans[i] = buf[i]; 511 ans[n] = 0; 512 break; 513 514 case UTF_8: 515 m = 0; 516 for(i = 0; i < n; ) { 517 i += chartorune(&ch, (char*)(buf+i)); 518 m++; 519 } 520 ans = (Rune*)emalloc((m+1)*sizeof(Rune)); 521 m = 0; 522 for(i = 0; i < n; ) { 523 i += chartorune(&ch, (char*)(buf+i)); 524 ans[m++] = ch; 525 } 526 ans[m] = 0; 527 break; 528 529 default: 530 ans = nil; 531 assert(0); 532 } 533 return ans; 534 } 535 536 // Convert buf[0:n], Unicode characters, 537 // into an emalloc'd null-terminated string in character set chset. 538 // Use 0x80 for unconvertable characters. 539 uchar* 540 fromStr(Rune* buf, int n, int chset) 541 { 542 uchar* ans; 543 int i, lim, m; 544 Rune ch; 545 uchar* p; 546 uchar s[UTFmax]; 547 548 ans = nil; 549 switch(chset) { 550 case US_Ascii: 551 case ISO_8859_1: 552 ans = (uchar*)emalloc(n+1); 553 lim = (chset==US_Ascii)? 127 : 255; 554 for(i = 0; i < n; i++) { 555 ch = buf[i]; 556 if(ch > lim) 557 ch = 0x80; 558 ans[i] = ch; 559 } 560 ans[n] = 0; 561 break; 562 563 case UTF_8: 564 m = 0; 565 for(i = 0; i < n; i++) { 566 m += runetochar((char*)s, &buf[i]); 567 } 568 ans = (uchar*)emalloc(m+1); 569 p = ans; 570 for(i = 0; i < n; i++) 571 p += runetochar((char*)p, &buf[i]); 572 *p = 0; 573 break; 574 575 default: 576 assert(0); 577 } 578 return ans; 579 580 } 581