1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <String.h> 5 #include <ctype.h> 6 #include <thread.h> 7 #include "wiki.h" 8 9 static Wpage* 10 mkwtxt(int type, char *text) 11 { 12 Wpage *w; 13 14 w = emalloc(sizeof(*w)); 15 w->type = type; 16 w->text = text; 17 return w; 18 } 19 20 /* 21 * turn runs of whitespace into single spaces, 22 * eliminate whitespace at beginning and end. 23 */ 24 char* 25 strcondense(char *s, int cutbegin) 26 { 27 char *r, *w, *es; 28 int inspace; 29 30 es = s+strlen(s); 31 inspace = cutbegin; 32 for(r=w=s; *r; r++){ 33 if(isspace(*r)){ 34 if(!inspace){ 35 inspace=1; 36 *w++ = ' '; 37 } 38 }else{ 39 inspace=0; 40 *w++ = *r; 41 } 42 } 43 assert(w <= es); 44 if(inspace && w>s){ 45 --w; 46 *w = '\0'; 47 } 48 else 49 *w = '\0'; 50 return s; 51 } 52 53 /* 54 * turn runs of Wplain into single Wplain. 55 */ 56 static Wpage* 57 wcondense(Wpage *wtxt) 58 { 59 Wpage *ow, *w; 60 61 for(w=wtxt; w; ){ 62 if(w->type == Wplain) 63 strcondense(w->text, 1); 64 65 if(w->type != Wplain || w->next==nil 66 || w->next->type != Wplain){ 67 w=w->next; 68 continue; 69 } 70 71 w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1); 72 strcat(w->text, " "); 73 strcat(w->text, w->next->text); 74 75 free(w->next->text); 76 ow = w->next; 77 w->next = w->next->next; 78 free(ow); 79 } 80 return wtxt; 81 } 82 83 /* 84 * Parse a link, without the brackets. 85 */ 86 static Wpage* 87 mklink(char *s) 88 { 89 char *q; 90 Wpage *w; 91 92 for(q=s; *q && *q != '|'; q++) 93 ; 94 95 if(*q == '\0'){ 96 w = mkwtxt(Wlink, estrdup(strcondense(s, 1))); 97 w->url = nil; 98 }else{ 99 *q = '\0'; 100 w = mkwtxt(Wlink, estrdup(strcondense(s, 1))); 101 w->url = estrdup(strcondense(q+1, 1)); 102 } 103 return w; 104 } 105 106 /* 107 * Parse Wplains, inserting Wlink nodes where appropriate. 108 */ 109 static Wpage* 110 wlink(Wpage *wtxt) 111 { 112 char *p, *q, *r, *s; 113 Wpage *w, *nw; 114 115 for(w=wtxt; w; w=nw){ 116 nw = w->next; 117 if(w->type != Wplain) 118 continue; 119 while(w->text[0]){ 120 p = w->text; 121 for(q=p; *q && *q != '['; q++) 122 ; 123 if(*q == '\0') 124 break; 125 for(r=q; *r && *r != ']'; r++) 126 ; 127 if(*r == '\0') 128 break; 129 *q = '\0'; 130 *r = '\0'; 131 s = w->text; 132 w->text = estrdup(w->text); 133 w->next = mklink(q+1); 134 w = w->next; 135 w->next = mkwtxt(Wplain, estrdup(r+1)); 136 free(s); 137 w = w->next; 138 w->next = nw; 139 } 140 assert(w->next == nw); 141 } 142 return wtxt; 143 } 144 145 static int 146 ismanchar(int c) 147 { 148 return ('a' <= c && c <= 'z') 149 || ('A' <= c && c <= 'Z') 150 || ('0' <= c && c <= '9') 151 || c=='_' || c=='-' || c=='.' || c=='/' 152 || (c < 0); /* UTF */ 153 } 154 155 static Wpage* 156 findmanref(char *p, char **beginp, char **endp) 157 { 158 char *q, *r; 159 Wpage *w; 160 161 q=p; 162 for(;;){ 163 for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++) 164 ; 165 if(*q == '\0') 166 break; 167 for(r=q; r>p && ismanchar(r[-1]); r--) 168 ; 169 if(r==q){ 170 q += 3; 171 continue; 172 } 173 *q = '\0'; 174 w = mkwtxt(Wman, estrdup(r)); 175 *beginp = r; 176 *q = '('; 177 w->section = q[1]-'0'; 178 *endp = q+3; 179 return w; 180 } 181 return nil; 182 } 183 184 /* 185 * Parse Wplains, looking for man page references. 186 * This should be done by using a plumb(6)-style 187 * control file rather than hard-coding things here. 188 */ 189 static Wpage* 190 wman(Wpage *wtxt) 191 { 192 char *q, *r; 193 Wpage *w, *mw, *nw; 194 195 for(w=wtxt; w; w=nw){ 196 nw = w->next; 197 if(w->type != Wplain) 198 continue; 199 while(w->text[0]){ 200 if((mw = findmanref(w->text, &q, &r)) == nil) 201 break; 202 *q = '\0'; 203 w->next = mw; 204 w = w->next; 205 w->next = mkwtxt(Wplain, estrdup(r)); 206 w = w->next; 207 w->next = nw; 208 } 209 assert(w->next == nw); 210 } 211 return wtxt; 212 } 213 214 static char *lower = "abcdefghijklmnopqrstuvwxyz"; 215 static char *upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 216 Wpage* 217 Brdpage(char *(*rdline)(void*,int), void *b) 218 { 219 char *p; 220 int waspara; 221 Wpage *w, **pw; 222 223 w = nil; 224 pw = &w; 225 waspara = 1; 226 while((p = rdline(b, '\n')) != nil){ 227 if(p[0] != '!') 228 p = strcondense(p, 1); 229 if(p[0] == '\0'){ 230 if(waspara==0){ 231 waspara=1; 232 *pw = mkwtxt(Wpara, nil); 233 pw = &(*pw)->next; 234 } 235 continue; 236 } 237 waspara = 0; 238 switch(p[0]){ 239 case '*': 240 *pw = mkwtxt(Wbullet, nil); 241 pw = &(*pw)->next; 242 *pw = mkwtxt(Wplain, estrdup(p+1)); 243 pw = &(*pw)->next; 244 break; 245 case '!': 246 *pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1)); 247 pw = &(*pw)->next; 248 break; 249 default: 250 if(strpbrk(p, lower)==nil && strpbrk(p, upper)){ 251 *pw = mkwtxt(Wheading, estrdup(p)); 252 pw = &(*pw)->next; 253 continue; 254 } 255 *pw = mkwtxt(Wplain, estrdup(p)); 256 pw = &(*pw)->next; 257 break; 258 } 259 } 260 if(w == nil) 261 werrstr("empty page"); 262 263 *pw = nil; 264 w = wcondense(w); 265 w = wlink(w); 266 w = wman(w); 267 268 return w; 269 } 270 271 void 272 printpage(Wpage *w) 273 { 274 for(; w; w=w->next){ 275 switch(w->type){ 276 case Wpara: 277 print("para\n"); 278 break; 279 case Wheading: 280 print("heading '%s'\n", w->text); 281 break; 282 case Wbullet: 283 print("bullet\n"); 284 break; 285 case Wlink: 286 print("link '%s' '%s'\n", w->text, w->url); 287 break; 288 case Wman: 289 print("man %d %s\n", w->section, w->text); 290 break; 291 case Wplain: 292 print("plain '%s'\n", w->text); 293 break; 294 case Wpre: 295 print("pre '%s'\n", w->text); 296 break; 297 } 298 } 299 } 300