1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <String.h> 5 #include <ctype.h> 6 #include <thread.h> 7 #include "wiki.h" 8 9 static Wpage* 10 mkwtxt(int type, char *text) 11 { 12 Wpage *w; 13 14 w = emalloc(sizeof(*w)); 15 w->type = type; 16 w->text = text; 17 return w; 18 } 19 20 /* 21 * turn runs of whitespace into single spaces, 22 * eliminate whitespace at beginning and end. 23 */ 24 char* 25 strcondense(char *s, int cutbegin) 26 { 27 char *r, *w, *es; 28 int inspace; 29 30 es = s+strlen(s); 31 inspace = cutbegin; 32 for(r=w=s; *r; r++){ 33 if(isspace(*r)){ 34 if(!inspace){ 35 inspace=1; 36 *w++ = ' '; 37 } 38 }else{ 39 inspace=0; 40 *w++ = *r; 41 } 42 } 43 assert(w <= es); 44 if(inspace && w>s){ 45 --w; 46 *w = '\0'; 47 } 48 else 49 *w = '\0'; 50 return s; 51 } 52 53 /* 54 * turn runs of Wplain into single Wplain. 55 */ 56 static Wpage* 57 wcondense(Wpage *wtxt) 58 { 59 Wpage *ow, *w; 60 61 for(w=wtxt; w; ){ 62 if(w->type == Wplain) 63 strcondense(w->text, 1); 64 65 if(w->type != Wplain || w->next==nil 66 || w->next->type != Wplain){ 67 w=w->next; 68 continue; 69 } 70 71 w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1); 72 strcat(w->text, " "); 73 strcat(w->text, w->next->text); 74 75 free(w->next->text); 76 ow = w->next; 77 w->next = w->next->next; 78 free(ow); 79 } 80 return wtxt; 81 } 82 83 /* 84 * Parse a link, without the brackets. 85 */ 86 static Wpage* 87 mklink(char *s) 88 { 89 char *q; 90 Wpage *w; 91 92 for(q=s; *q && *q != '|'; q++) 93 ; 94 95 if(*q == '\0'){ 96 w = mkwtxt(Wlink, estrdup(strcondense(s, 1))); 97 w->url = nil; 98 }else{ 99 *q = '\0'; 100 w = mkwtxt(Wlink, estrdup(strcondense(s, 1))); 101 w->url = estrdup(strcondense(q+1, 1)); 102 } 103 return w; 104 } 105 106 /* 107 * Parse Wplains, inserting Wlink nodes where appropriate. 108 */ 109 static Wpage* 110 wlink(Wpage *wtxt) 111 { 112 char *p, *q, *r, *s; 113 Wpage *w, *nw; 114 115 for(w=wtxt; w; w=nw){ 116 nw = w->next; 117 if(w->type != Wplain) 118 continue; 119 while(w->text[0]){ 120 p = w->text; 121 for(q=p; *q && *q != '['; q++) 122 ; 123 if(*q == '\0') 124 break; 125 for(r=q; *r && *r != ']'; r++) 126 ; 127 if(*r == '\0') 128 break; 129 *q = '\0'; 130 *r = '\0'; 131 s = w->text; 132 w->text = estrdup(w->text); 133 w->next = mklink(q+1); 134 w = w->next; 135 w->next = mkwtxt(Wplain, estrdup(r+1)); 136 free(s); 137 w = w->next; 138 w->next = nw; 139 } 140 assert(w->next == nw); 141 } 142 return wtxt; 143 } 144 145 static int 146 ismanchar(int c) 147 { 148 return ('a' <= c && c <= 'z') 149 || ('A' <= c && c <= 'Z') 150 || ('0' <= c && c <= '9') 151 || c=='_' || c=='-' || c=='.' || c=='/' 152 || (c < 0); /* UTF */ 153 } 154 155 static Wpage* 156 findmanref(char *p, char **beginp, char **endp) 157 { 158 char *q, *r; 159 Wpage *w; 160 161 q=p; 162 for(;;){ 163 for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++) 164 ; 165 if(*q == '\0') 166 break; 167 for(r=q; r>p && ismanchar(r[-1]); r--) 168 ; 169 if(r==q){ 170 q += 3; 171 continue; 172 } 173 *q = '\0'; 174 w = mkwtxt(Wman, estrdup(r)); 175 *beginp = r; 176 *q = '('; 177 w->section = q[1]-'0'; 178 *endp = q+3; 179 return w; 180 } 181 return nil; 182 } 183 184 /* 185 * Parse Wplains, looking for man page references. 186 * This should be done by using a plumb(6)-style 187 * control file rather than hard-coding things here. 188 */ 189 static Wpage* 190 wman(Wpage *wtxt) 191 { 192 char *q, *r; 193 Wpage *w, *mw, *nw; 194 195 for(w=wtxt; w; w=nw){ 196 nw = w->next; 197 if(w->type != Wplain) 198 continue; 199 while(w->text[0]){ 200 if((mw = findmanref(w->text, &q, &r)) == nil) 201 break; 202 *q = '\0'; 203 w->next = mw; 204 w = w->next; 205 w->next = mkwtxt(Wplain, estrdup(r)); 206 w = w->next; 207 w->next = nw; 208 } 209 assert(w->next == nw); 210 } 211 return wtxt; 212 } 213 214 static int isheading(char *p) { 215 Rune r; 216 int hasupper=0; 217 while(*p) { 218 p+=chartorune(&r,p); 219 if(isupperrune(r)) 220 hasupper=1; 221 else if(islowerrune(r)) 222 return 0; 223 } 224 return hasupper; 225 } 226 227 Wpage* 228 Brdpage(char *(*rdline)(void*,int), void *b) 229 { 230 char *p, *c; 231 int waspara; 232 Wpage *w, **pw; 233 234 w = nil; 235 pw = &w; 236 waspara = 1; 237 while((p = rdline(b, '\n')) != nil){ 238 if(p[0] != '!') 239 p = strcondense(p, 1); 240 if(p[0] == '\0'){ 241 if(waspara==0){ 242 waspara=1; 243 *pw = mkwtxt(Wpara, nil); 244 pw = &(*pw)->next; 245 } 246 continue; 247 } 248 waspara = 0; 249 switch(p[0]){ 250 case '*': 251 *pw = mkwtxt(Wbullet, nil); 252 pw = &(*pw)->next; 253 *pw = mkwtxt(Wplain, estrdup(p+1)); 254 pw = &(*pw)->next; 255 break; 256 case '!': 257 *pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1)); 258 pw = &(*pw)->next; 259 break; 260 case '-': 261 for(c = p; *c != '\0'; c++) { 262 if(*c != '-') { 263 c = p; 264 break; 265 } 266 } 267 268 if( (c-p) > 4) { 269 *pw = mkwtxt(Whr, nil); 270 pw = &(*pw)->next; 271 break; 272 } 273 /* else fall thru */ 274 default: 275 if(isheading(p)){ 276 *pw = mkwtxt(Wheading, estrdup(p)); 277 pw = &(*pw)->next; 278 continue; 279 } 280 *pw = mkwtxt(Wplain, estrdup(p)); 281 pw = &(*pw)->next; 282 break; 283 } 284 } 285 if(w == nil) 286 werrstr("empty page"); 287 288 *pw = nil; 289 w = wcondense(w); 290 w = wlink(w); 291 w = wman(w); 292 293 return w; 294 } 295 296 void 297 printpage(Wpage *w) 298 { 299 for(; w; w=w->next){ 300 switch(w->type){ 301 case Wpara: 302 print("para\n"); 303 break; 304 case Wheading: 305 print("heading '%s'\n", w->text); 306 break; 307 case Wbullet: 308 print("bullet\n"); 309 break; 310 case Wlink: 311 print("link '%s' '%s'\n", w->text, w->url); 312 break; 313 case Wman: 314 print("man %d %s\n", w->section, w->text); 315 break; 316 case Wplain: 317 print("plain '%s'\n", w->text); 318 break; 319 case Whr: 320 print("hr\n"); 321 break; 322 case Wpre: 323 print("pre '%s'\n", w->text); 324 break; 325 } 326 } 327 } 328