1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <html.h>
5 #include "impl.h"
6
7 Rune* whitespace = L" \t\n\r";
8 Rune* notwhitespace = L"^ \t\n\r";
9
10 // All lists start out like List structure.
11 // List itself can be used as list of int.
12 int
_listlen(List * l)13 _listlen(List* l)
14 {
15 int n = 0;
16
17 while(l != nil) {
18 l = l->next;
19 n++;
20 }
21 return n;
22 }
23
24 // Cons
25 List*
_newlist(int val,List * rest)26 _newlist(int val, List* rest)
27 {
28 List* ans;
29
30 ans = (List*)emalloc(sizeof(List));
31 ans->val = val;
32 ans->next = rest;
33 return ans;
34 }
35
36 // Reverse a list in place
37 List*
_revlist(List * l)38 _revlist(List* l)
39 {
40 List* newl;
41 List* nextl;
42
43 newl = nil;
44 while(l != nil) {
45 nextl = l->next;
46 l->next = newl;
47 newl = l;
48 l = nextl;
49 }
50 return newl;
51 }
52
53 // The next few routines take a "character class" as argument.
54 // e.g., "a-zA-Z", or "^ \t\n"
55 // (ranges indicated by - except in first position;
56 // ^ is first position means "not in" the following class)
57
58 // Splitl splits s[0:n] just before first character of class cl.
59 // Answers go in (p1, n1) and (p2, n2).
60 // If no split, the whole thing goes in the first component.
61 // Note: answers contain pointers into original string.
62 void
_splitl(Rune * s,int n,Rune * cl,Rune ** p1,int * n1,Rune ** p2,int * n2)63 _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
64 {
65 Rune* p;
66
67 p = _Strnclass(s, cl, n);
68 *p1 = s;
69 if(p == nil) {
70 *n1 = n;
71 *p2 = nil;
72 *n2 = 0;
73 }
74 else {
75 *p2 = p;
76 *n1 = p-s;
77 *n2 = n-*n1;
78 }
79 }
80
81 // Splitr splits s[0:n] just after last character of class cl.
82 // Answers go in (p1, n1) and (p2, n2).
83 // If no split, the whole thing goes in the last component.
84 // Note: answers contain pointers into original string.
85 void
_splitr(Rune * s,int n,Rune * cl,Rune ** p1,int * n1,Rune ** p2,int * n2)86 _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
87 {
88 Rune* p;
89
90 p = _Strnrclass(s, cl, n);
91 if(p == nil) {
92 *p1 = nil;
93 *n1 = 0;
94 *p2 = s;
95 *n2 = n;
96 }
97 else {
98 *p1 = s;
99 *p2 = p+1;
100 *n1 = *p2-s;
101 *n2 = n-*n1;
102 }
103 }
104
105 // Splitall splits s[0:n] into parts that are separated by characters from class cl.
106 // Each part will have nonzero length.
107 // At most alen parts are found, and pointers to their starts go into
108 // the strarr array, while their lengths go into the lenarr array.
109 // The return value is the number of parts found.
110 int
_splitall(Rune * s,int n,Rune * cl,Rune ** strarr,int * lenarr,int alen)111 _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
112 {
113 int i;
114 Rune* p;
115 Rune* q;
116 Rune* slast;
117
118 if(s == nil || n == 0)
119 return 0;
120 i = 0;
121 p = s;
122 slast = s+n;
123 while(p < slast && i < alen) {
124 while(p < slast && _inclass(*p, cl))
125 p++;
126 if(p == slast)
127 break;
128 q = _Strnclass(p, cl, slast-p);
129 if(q == nil)
130 q = slast;
131 assert(q > p && q <= slast);
132 strarr[i] = p;
133 lenarr[i] = q-p;
134 i++;
135 p = q;
136 }
137 return i;
138 }
139
140 // Find part of s that excludes leading and trailing whitespace,
141 // and return that part in *pans (and its length in *panslen).
142 void
_trimwhite(Rune * s,int n,Rune ** pans,int * panslen)143 _trimwhite(Rune* s, int n, Rune** pans, int* panslen)
144 {
145 Rune* p;
146 Rune* q;
147
148 p = nil;
149 if(n > 0) {
150 p = _Strnclass(s, notwhitespace, n);
151 if(p != nil) {
152 q = _Strnrclass(s, notwhitespace, n);
153 assert(q != nil);
154 n = q+1-p;
155 }
156 }
157 *pans = p;
158 *panslen = n;
159 }
160
161 // _Strclass returns a pointer to the first element of s that is
162 // a member of class cl, nil if none.
163 Rune*
_Strclass(Rune * s,Rune * cl)164 _Strclass(Rune* s, Rune* cl)
165 {
166 Rune* p;
167
168 for(p = s; *p != 0; p++)
169 if(_inclass(*p, cl))
170 return p;
171 return nil;
172 }
173
174 // _Strnclass returns a pointer to the first element of s[0:n] that is
175 // a member of class cl, nil if none.
176 Rune*
_Strnclass(Rune * s,Rune * cl,int n)177 _Strnclass(Rune* s, Rune* cl, int n)
178 {
179 Rune* p;
180
181 for(p = s; n-- && *p != 0; p++)
182 if(_inclass(*p, cl))
183 return p;
184 return nil;
185 }
186
187 // _Strrclass returns a pointer to the last element of s that is
188 // a member of class cl, nil if none
189 Rune*
_Strrclass(Rune * s,Rune * cl)190 _Strrclass(Rune* s, Rune* cl)
191 {
192 Rune* p;
193
194 if(s == nil || *s == 0)
195 return nil;
196 p = s + runestrlen(s) - 1;
197 while(p >= s) {
198 if(_inclass(*p, cl))
199 return p;
200 p--;
201 };
202 return nil;
203 }
204
205 // _Strnrclass returns a pointer to the last element of s[0:n] that is
206 // a member of class cl, nil if none
207 Rune*
_Strnrclass(Rune * s,Rune * cl,int n)208 _Strnrclass(Rune* s, Rune* cl, int n)
209 {
210 Rune* p;
211
212 if(s == nil || *s == 0 || n == 0)
213 return nil;
214 p = s + n - 1;
215 while(p >= s) {
216 if(_inclass(*p, cl))
217 return p;
218 p--;
219 };
220 return nil;
221 }
222
223 // Is c in the class cl?
224 int
_inclass(Rune c,Rune * cl)225 _inclass(Rune c, Rune* cl)
226 {
227 int n;
228 int ans;
229 int negate;
230 int i;
231
232 n = _Strlen(cl);
233 if(n == 0)
234 return 0;
235 ans = 0;
236 negate = 0;
237 if(cl[0] == '^') {
238 negate = 1;
239 cl++;
240 n--;
241 }
242 for(i = 0; i < n; i++) {
243 if(cl[i] == '-' && i > 0 && i < n - 1) {
244 if(c >= cl[i - 1] && c <= cl[i + 1]) {
245 ans = 1;
246 break;
247 }
248 i++;
249 }
250 else if(c == cl[i]) {
251 ans = 1;
252 break;
253 }
254 }
255 if(negate)
256 ans = !ans;
257 return ans;
258 }
259
260 // Is pre a prefix of s?
261 int
_prefix(Rune * pre,Rune * s)262 _prefix(Rune* pre, Rune* s)
263 {
264 int ns;
265 int n;
266 int k;
267
268 ns = _Strlen(s);
269 n = _Strlen(pre);
270 if(ns < n)
271 return 0;
272 for(k = 0; k < n; k++) {
273 if(pre[k] != s[k])
274 return 0;
275 }
276 return 1;
277 }
278
279 // Number of runes in (null-terminated) s
280 int
_Strlen(Rune * s)281 _Strlen(Rune* s)
282 {
283 if(s == nil)
284 return 0;
285 return runestrlen(s);
286 }
287
288 // -1, 0, 1 as s1 is lexicographically less, equal greater than s2
289 int
_Strcmp(Rune * s1,Rune * s2)290 _Strcmp(Rune *s1, Rune *s2)
291 {
292 if(s1 == nil)
293 return (s2 == nil || *s2 == 0) ? 0 : -1;
294 if(s2 == nil)
295 return (*s1 == 0) ? 0 : 1;
296 return runestrcmp(s1, s2);
297 }
298
299 // Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
300 // Also, do a case-insensitive match, assuming s2
301 // has no chars in [A-Z], only their lowercase versions.
302 // (This routine is used for in-place keyword lookup, where s2 is in a keyword
303 // list and s1 is some substring, possibly mixed-case, in a buffer.)
304 int
_Strncmpci(Rune * s1,int n1,Rune * s2)305 _Strncmpci(Rune *s1, int n1, Rune *s2)
306 {
307 Rune c1, c2;
308
309 for(;;) {
310 if(n1-- == 0) {
311 if(*s2 == 0)
312 return 0;
313 return -1;
314 }
315 c1 = *s1++;
316 c2 = *s2++;
317 if(c1 >= 'A' && c1 <= 'Z')
318 c1 = c1 - 'A' + 'a';
319 if(c1 != c2) {
320 if(c1 > c2)
321 return 1;
322 return -1;
323 }
324 }
325 }
326
327 // emalloc and copy
328 Rune*
_Strdup(Rune * s)329 _Strdup(Rune* s)
330 {
331 if(s == nil)
332 return nil;
333 return _Strndup(s, runestrlen(s));
334 }
335
336 // emalloc and copy n chars of s (assume s is at least that long),
337 // and add 0 terminator.
338 // Return nil if n==0.
339 Rune*
_Strndup(Rune * s,int n)340 _Strndup(Rune* s, int n)
341 {
342 Rune* ans;
343
344 if(n <= 0)
345 return nil;
346 ans = _newstr(n);
347 memmove(ans, s, n*sizeof(Rune));
348 ans[n] = 0;
349 return ans;
350 }
351 // emalloc enough room for n Runes, plus 1 null terminator.
352 // (Not initialized to anything.)
353 Rune*
_newstr(int n)354 _newstr(int n)
355 {
356 return (Rune*)emalloc((n+1)*sizeof(Rune));
357 }
358
359 // emalloc and copy s+t
360 Rune*
_Strdup2(Rune * s,Rune * t)361 _Strdup2(Rune* s, Rune* t)
362 {
363 int ns, nt;
364 Rune* ans;
365 Rune* p;
366
367 ns = _Strlen(s);
368 nt = _Strlen(t);
369 if(ns+nt == 0)
370 return nil;
371 ans = _newstr(ns+nt);
372 p = _Stradd(ans, s, ns);
373 p = _Stradd(p, t, nt);
374 *p = 0;
375 return ans;
376 }
377
378 // Return emalloc'd substring s[start:stop],
379 Rune*
_Strsubstr(Rune * s,int start,int stop)380 _Strsubstr(Rune* s, int start, int stop)
381 {
382 Rune* t;
383
384 if(start == stop)
385 return nil;
386 t = _Strndup(s+start, stop-start);
387 return t;
388 }
389
390 // Copy n chars to s1 from s2, and return s1+n
391 Rune*
_Stradd(Rune * s1,Rune * s2,int n)392 _Stradd(Rune* s1, Rune* s2, int n)
393 {
394 if(n == 0)
395 return s1;
396 memmove(s1, s2, n*sizeof(Rune));
397 return s1+n;
398 }
399
400 // Like strtol, but converting from Rune* string
401
402 #define LONG_MAX 2147483647L
403 #define LONG_MIN -2147483648L
404
405 long
_Strtol(Rune * nptr,Rune ** endptr,int base)406 _Strtol(Rune* nptr, Rune** endptr, int base)
407 {
408 Rune* p;
409 long n, nn;
410 int c, ovfl, v, neg, ndig;
411
412 p = nptr;
413 neg = 0;
414 n = 0;
415 ndig = 0;
416 ovfl = 0;
417
418 /*
419 * White space
420 */
421 for(;;p++){
422 switch(*p){
423 case ' ':
424 case '\t':
425 case '\n':
426 case '\f':
427 case '\r':
428 case '\v':
429 continue;
430 }
431 break;
432 }
433
434 /*
435 * Sign
436 */
437 if(*p=='-' || *p=='+')
438 if(*p++ == '-')
439 neg = 1;
440
441 /*
442 * Base
443 */
444 if(base==0){
445 if(*p != '0')
446 base = 10;
447 else{
448 base = 8;
449 if(p[1]=='x' || p[1]=='X'){
450 p += 2;
451 base = 16;
452 }
453 }
454 }else if(base==16 && *p=='0'){
455 if(p[1]=='x' || p[1]=='X')
456 p += 2;
457 }else if(base<0 || 36<base)
458 goto Return;
459
460 /*
461 * Non-empty sequence of digits
462 */
463 for(;; p++,ndig++){
464 c = *p;
465 v = base;
466 if('0'<=c && c<='9')
467 v = c - '0';
468 else if('a'<=c && c<='z')
469 v = c - 'a' + 10;
470 else if('A'<=c && c<='Z')
471 v = c - 'A' + 10;
472 if(v >= base)
473 break;
474 nn = n*base + v;
475 if(nn < n)
476 ovfl = 1;
477 n = nn;
478 }
479
480 Return:
481 if(ndig == 0)
482 p = nptr;
483 if(endptr)
484 *endptr = p;
485 if(ovfl){
486 if(neg)
487 return LONG_MIN;
488 return LONG_MAX;
489 }
490 if(neg)
491 return -n;
492 return n;
493 }
494
495 // Convert buf[0:n], bytes whose character set is chset,
496 // into a emalloc'd null-terminated Unicode string.
497 Rune*
toStr(uchar * buf,int n,int chset)498 toStr(uchar* buf, int n, int chset)
499 {
500 int i;
501 int m;
502 Rune ch;
503 Rune* ans;
504
505 switch(chset) {
506 case US_Ascii:
507 case ISO_8859_1:
508 ans = (Rune*)emalloc((n+1)*sizeof(Rune));
509 for(i = 0; i < n; i++)
510 ans[i] = buf[i];
511 ans[n] = 0;
512 break;
513
514 case UTF_8:
515 m = 0;
516 for(i = 0; i < n; ) {
517 i += chartorune(&ch, (char*)(buf+i));
518 m++;
519 }
520 ans = (Rune*)emalloc((m+1)*sizeof(Rune));
521 m = 0;
522 for(i = 0; i < n; ) {
523 i += chartorune(&ch, (char*)(buf+i));
524 ans[m++] = ch;
525 }
526 ans[m] = 0;
527 break;
528
529 default:
530 ans = nil;
531 assert(0);
532 }
533 return ans;
534 }
535
536 // Convert buf[0:n], Unicode characters,
537 // into an emalloc'd null-terminated string in character set chset.
538 // Use 0x80 for unconvertable characters.
539 uchar*
fromStr(Rune * buf,int n,int chset)540 fromStr(Rune* buf, int n, int chset)
541 {
542 uchar* ans;
543 int i, lim, m;
544 Rune ch;
545 uchar* p;
546 uchar s[UTFmax];
547
548 ans = nil;
549 switch(chset) {
550 case US_Ascii:
551 case ISO_8859_1:
552 ans = (uchar*)emalloc(n+1);
553 lim = (chset==US_Ascii)? 127 : 255;
554 for(i = 0; i < n; i++) {
555 ch = buf[i];
556 if(ch > lim)
557 ch = 0x80;
558 ans[i] = ch;
559 }
560 ans[n] = 0;
561 break;
562
563 case UTF_8:
564 m = 0;
565 for(i = 0; i < n; i++) {
566 m += runetochar((char*)s, &buf[i]);
567 }
568 ans = (uchar*)emalloc(m+1);
569 p = ans;
570 for(i = 0; i < n; i++)
571 p += runetochar((char*)p, &buf[i]);
572 *p = 0;
573 break;
574
575 default:
576 assert(0);
577 }
578 return ans;
579
580 }
581