1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <String.h> 5 #include <ctype.h> 6 #include <thread.h> 7 #include "wiki.h" 8 9 static Wpage* 10 mkwtxt(int type, char *text) 11 { 12 Wpage *w; 13 14 w = emalloc(sizeof(*w)); 15 w->type = type; 16 w->text = text; 17 setmalloctag(w, getcallerpc(&type)); 18 return w; 19 } 20 21 /* 22 * turn runs of whitespace into single spaces, 23 * eliminate whitespace at beginning and end. 24 */ 25 char* 26 strcondense(char *s, int cutbegin) 27 { 28 char *r, *w, *es; 29 int inspace; 30 31 es = s+strlen(s); 32 inspace = cutbegin; 33 for(r=w=s; *r; r++){ 34 if(isspace(*r)){ 35 if(!inspace){ 36 inspace=1; 37 *w++ = ' '; 38 } 39 }else{ 40 inspace=0; 41 *w++ = *r; 42 } 43 } 44 assert(w <= es); 45 if(inspace && w>s){ 46 --w; 47 *w = '\0'; 48 } 49 else 50 *w = '\0'; 51 return s; 52 } 53 54 /* 55 * turn runs of Wplain into single Wplain. 56 */ 57 static Wpage* 58 wcondense(Wpage *wtxt) 59 { 60 Wpage *ow, *w; 61 62 for(w=wtxt; w; ){ 63 if(w->type == Wplain) 64 strcondense(w->text, 1); 65 66 if(w->type != Wplain || w->next==nil 67 || w->next->type != Wplain){ 68 w=w->next; 69 continue; 70 } 71 72 w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1); 73 strcat(w->text, " "); 74 strcat(w->text, w->next->text); 75 76 free(w->next->text); 77 ow = w->next; 78 w->next = w->next->next; 79 free(ow); 80 } 81 return wtxt; 82 } 83 84 /* 85 * Parse a link, without the brackets. 86 */ 87 static Wpage* 88 mklink(char *s) 89 { 90 char *q; 91 Wpage *w; 92 93 for(q=s; *q && *q != '|'; q++) 94 ; 95 96 if(*q == '\0'){ 97 w = mkwtxt(Wlink, estrdup(strcondense(s, 1))); 98 w->url = nil; 99 }else{ 100 *q = '\0'; 101 w = mkwtxt(Wlink, estrdup(strcondense(s, 1))); 102 w->url = estrdup(strcondense(q+1, 1)); 103 } 104 setmalloctag(w, getcallerpc(&s)); 105 return w; 106 } 107 108 /* 109 * Parse Wplains, inserting Wlink nodes where appropriate. 110 */ 111 static Wpage* 112 wlink(Wpage *wtxt) 113 { 114 char *p, *q, *r, *s; 115 Wpage *w, *nw; 116 117 for(w=wtxt; w; w=nw){ 118 nw = w->next; 119 if(w->type != Wplain) 120 continue; 121 while(w->text[0]){ 122 p = w->text; 123 for(q=p; *q && *q != '['; q++) 124 ; 125 if(*q == '\0') 126 break; 127 for(r=q; *r && *r != ']'; r++) 128 ; 129 if(*r == '\0') 130 break; 131 *q = '\0'; 132 *r = '\0'; 133 s = w->text; 134 w->text = estrdup(w->text); 135 w->next = mklink(q+1); 136 w = w->next; 137 w->next = mkwtxt(Wplain, estrdup(r+1)); 138 free(s); 139 w = w->next; 140 w->next = nw; 141 } 142 assert(w->next == nw); 143 } 144 return wtxt; 145 } 146 147 static int 148 ismanchar(int c) 149 { 150 return ('a' <= c && c <= 'z') 151 || ('A' <= c && c <= 'Z') 152 || ('0' <= c && c <= '9') 153 || c=='_' || c=='-' || c=='.' || c=='/' 154 || (c < 0); /* UTF */ 155 } 156 157 static Wpage* 158 findmanref(char *p, char **beginp, char **endp) 159 { 160 char *q, *r; 161 Wpage *w; 162 163 q=p; 164 for(;;){ 165 for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++) 166 ; 167 if(*q == '\0') 168 break; 169 for(r=q; r>p && ismanchar(r[-1]); r--) 170 ; 171 if(r==q){ 172 q += 3; 173 continue; 174 } 175 *q = '\0'; 176 w = mkwtxt(Wman, estrdup(r)); 177 *beginp = r; 178 *q = '('; 179 w->section = q[1]-'0'; 180 *endp = q+3; 181 setmalloctag(w, getcallerpc(&p)); 182 return w; 183 } 184 return nil; 185 } 186 187 /* 188 * Parse Wplains, looking for man page references. 189 * This should be done by using a plumb(6)-style 190 * control file rather than hard-coding things here. 191 */ 192 static Wpage* 193 wman(Wpage *wtxt) 194 { 195 char *q, *r; 196 Wpage *w, *mw, *nw; 197 198 for(w=wtxt; w; w=nw){ 199 nw = w->next; 200 if(w->type != Wplain) 201 continue; 202 while(w->text[0]){ 203 if((mw = findmanref(w->text, &q, &r)) == nil) 204 break; 205 *q = '\0'; 206 w->next = mw; 207 w = w->next; 208 w->next = mkwtxt(Wplain, estrdup(r)); 209 w = w->next; 210 w->next = nw; 211 } 212 assert(w->next == nw); 213 } 214 return wtxt; 215 } 216 217 static int isheading(char *p) { 218 Rune r; 219 int hasupper=0; 220 while(*p) { 221 p+=chartorune(&r,p); 222 if(isupperrune(r)) 223 hasupper=1; 224 else if(islowerrune(r)) 225 return 0; 226 } 227 return hasupper; 228 } 229 230 Wpage* 231 Brdpage(char *(*rdline)(void*,int), void *b) 232 { 233 char *p, *c; 234 int waspara; 235 Wpage *w, **pw; 236 237 w = nil; 238 pw = &w; 239 waspara = 1; 240 while((p = rdline(b, '\n')) != nil){ 241 if(p[0] != '!') 242 p = strcondense(p, 1); 243 if(p[0] == '\0'){ 244 if(waspara==0){ 245 waspara=1; 246 *pw = mkwtxt(Wpara, nil); 247 pw = &(*pw)->next; 248 } 249 continue; 250 } 251 waspara = 0; 252 switch(p[0]){ 253 case '*': 254 *pw = mkwtxt(Wbullet, nil); 255 pw = &(*pw)->next; 256 *pw = mkwtxt(Wplain, estrdup(p+1)); 257 pw = &(*pw)->next; 258 break; 259 case '!': 260 *pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1)); 261 pw = &(*pw)->next; 262 break; 263 case '-': 264 for(c = p; *c != '\0'; c++) { 265 if(*c != '-') { 266 c = p; 267 break; 268 } 269 } 270 271 if( (c-p) > 4) { 272 *pw = mkwtxt(Whr, nil); 273 pw = &(*pw)->next; 274 break; 275 } 276 /* else fall thru */ 277 default: 278 if(isheading(p)){ 279 *pw = mkwtxt(Wheading, estrdup(p)); 280 pw = &(*pw)->next; 281 continue; 282 } 283 *pw = mkwtxt(Wplain, estrdup(p)); 284 pw = &(*pw)->next; 285 break; 286 } 287 } 288 if(w == nil) 289 werrstr("empty page"); 290 291 *pw = nil; 292 w = wcondense(w); 293 w = wlink(w); 294 w = wman(w); 295 setmalloctag(w, getcallerpc(&rdline)); 296 297 return w; 298 } 299 300 void 301 printpage(Wpage *w) 302 { 303 for(; w; w=w->next){ 304 switch(w->type){ 305 case Wpara: 306 print("para\n"); 307 break; 308 case Wheading: 309 print("heading '%s'\n", w->text); 310 break; 311 case Wbullet: 312 print("bullet\n"); 313 break; 314 case Wlink: 315 print("link '%s' '%s'\n", w->text, w->url); 316 break; 317 case Wman: 318 print("man %d %s\n", w->section, w->text); 319 break; 320 case Wplain: 321 print("plain '%s'\n", w->text); 322 break; 323 case Whr: 324 print("hr\n"); 325 break; 326 case Wpre: 327 print("pre '%s'\n", w->text); 328 break; 329 } 330 } 331 } 332